From 7671d2f1d5598cdf8685d19166fb7013279168d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 12 Mar 2022 00:45:26 +0100 Subject: [PATCH 001/615] Change timezone in stateless tests --- ...s_and_insert_without_explicit_database.sql | 4 +- .../0_stateless/00189_time_zones_long.sql | 144 ++++++------ .../00206_empty_array_to_single.sql | 2 +- tests/queries/0_stateless/00301_csv.sh | 4 +- ...4_json_each_row_input_with_noisy_fields.sh | 2 +- tests/queries/0_stateless/00502_sum_map.sql | 2 +- .../00506_shard_global_in_union.sql | 2 +- .../00512_fractional_time_zones.sh | 2 +- .../0_stateless/00515_enhanced_time_zones.sql | 18 +- tests/queries/0_stateless/00538_datediff.sql | 24 +- .../0_stateless/00561_storage_join.sql | 2 +- .../0_stateless/00718_format_datetime.sql | 2 +- .../00719_format_datetime_rand.sql | 2 +- .../00735_long_conditional.reference | 12 +- .../0_stateless/00735_long_conditional.sql | 96 ++++---- ...01_daylight_saving_time_hour_underflow.sql | 2 +- ...aving_time_shift_backwards_at_midnight.sql | 2 +- .../00825_protobuf_format_persons.sh | 6 +- .../0_stateless/00835_if_generic_case.sql | 24 +- .../00910_decimal_group_array_crash_3783.sql | 14 +- ...00921_datetime64_compatibility_long.python | 130 +++++------ ...21_datetime64_compatibility_long.reference | 212 +++++++++--------- .../00927_asof_join_other_types.sh | 2 +- .../00935_to_iso_week_first_year.sql | 2 +- .../0_stateless/00941_to_custom_week.sql | 10 +- .../0_stateless/00945_bloom_filter_index.sql | 46 ++-- .../01077_mutations_index_consistency.sh | 12 +- .../0_stateless/01087_storage_generate.sql | 2 +- .../01087_table_function_generate.reference | 4 +- .../01087_table_function_generate.sql | 16 +- .../0_stateless/01098_msgpack_format.sh | 2 +- .../01186_conversion_to_nullable.sql | 6 +- ...ter_rename_with_default_zookeeper_long.sql | 4 +- .../0_stateless/01269_toStartOfSecond.sql | 2 +- tests/queries/0_stateless/01273_arrow_load.sh | 2 +- .../0_stateless/01277_toUnixTimestamp64.sql | 12 +- .../0_stateless/01280_min_map_max_map.sql | 4 +- .../0_stateless/01307_orc_output_format.sh | 2 +- .../01379_with_fill_several_columns.sql | 8 +- ...396_negative_datetime_saturate_to_zero.sql | 2 +- .../01414_low_cardinality_nullable.sql | 30 +-- ..._parse_date_time_best_effort_timestamp.sql | 6 +- .../01440_to_date_monotonicity.sql | 8 +- .../01442_date_time_with_params.reference | 6 +- .../01442_date_time_with_params.sql | 6 +- .../01508_partition_pruning_long.queries | 38 ++-- .../01508_partition_pruning_long.reference | 4 +- .../01516_date_time_output_format.sql | 8 +- .../0_stateless/01582_any_join_supertype.sql | 2 +- .../01615_two_args_function_index_fix.sql | 2 +- .../0_stateless/01676_reinterpret_as.sql | 4 +- .../01691_DateTime64_clamp.reference | 26 +-- .../0_stateless/01691_DateTime64_clamp.sql | 26 +-- .../01692_DateTime64_from_DateTime.sql | 6 +- .../0_stateless/01698_fix_toMinute.reference | 2 +- .../0_stateless/01698_fix_toMinute.sql | 6 +- .../01699_timezoneOffset.reference | 4 +- .../0_stateless/01699_timezoneOffset.sql | 16 +- .../01702_toDateTime_from_string_clamping.sql | 8 +- ...732_more_consistent_datetime64_parsing.sql | 8 +- .../01734_datetime64_from_float.sql | 6 +- .../0_stateless/01761_round_year_bounds.sql | 2 +- .../0_stateless/01769_extended_range_2.sql | 2 +- .../01772_to_start_of_hour_align.sql | 4 +- ...ormatDateTime_DateTime64_century.reference | 24 +- ...1802_formatDateTime_DateTime64_century.sql | 24 +- .../01802_toDateTime64_large_values.reference | 6 +- .../01802_toDateTime64_large_values.sql | 6 +- tests/queries/0_stateless/01811_datename.sql | 8 +- .../0_stateless/01821_to_date_time_ubsan.sql | 4 +- .../0_stateless/01852_map_combinator.sql | 2 +- ...1867_support_datetime64_version_column.sql | 2 +- .../01868_order_by_fill_with_datetime64.sql | 4 +- .../0_stateless/01891_partition_hash.sql | 2 +- .../01891_partition_hash_no_long_int.sql | 2 +- .../0_stateless/01905_to_json_string.sql | 2 +- .../0_stateless/01921_datatype_date32.sql | 18 +- .../01925_date_date_time_comparison.sql | 4 +- .../01926_date_date_time_supertype.reference | 14 +- .../01926_date_date_time_supertype.sql | 10 +- ...nversion_between_date32_and_datetime64.sql | 2 +- .../02096_date_time_1970_saturation.sql | 24 +- .../02176_toStartOfWeek_overflow_pruning.sql | 2 +- .../02184_default_table_engine.sql | 4 +- .../alltypes_list.parquet.columns | 2 +- ...1.column-metadata-handling.parquet.columns | 2 +- .../00900_parquet_create_table_columns.py | 4 +- 87 files changed, 624 insertions(+), 624 deletions(-) diff --git a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql index 2fd097b9538..f3130f24521 100644 --- a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql +++ b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql @@ -8,8 +8,8 @@ DROP TABLE IF EXISTS test_table; DROP TABLE IF EXISTS test_view; DROP TABLE IF EXISTS test_view_filtered; -CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Europe/Moscow'), UTCEventTime DateTime('UTC')) ENGINE = MergeTree(EventDate, CounterID, 8192); -CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Europe/Moscow')) ENGINE = Memory AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; +CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Asia/Istanbul'), UTCEventTime DateTime('UTC')) ENGINE = MergeTree(EventDate, CounterID, 8192); +CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Asia/Istanbul')) ENGINE = Memory AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; CREATE MATERIALIZED VIEW test_view_filtered (EventDate Date, CounterID UInt32) ENGINE = Memory POPULATE AS SELECT CounterID, EventDate FROM test_table WHERE EventDate < '2013-01-01'; INSERT INTO test_table (EventDate, UTCEventTime) VALUES ('2014-01-02', '2014-01-02 03:04:06'); diff --git a/tests/queries/0_stateless/00189_time_zones_long.sql b/tests/queries/0_stateless/00189_time_zones_long.sql index ecc5f62ed1d..a25c9c7a415 100644 --- a/tests/queries/0_stateless/00189_time_zones_long.sql +++ b/tests/queries/0_stateless/00189_time_zones_long.sql @@ -1,12 +1,12 @@ -- Tags: long -/* timestamp 1419800400 == 2014-12-29 00:00:00 (Europe/Moscow) */ -/* timestamp 1412106600 == 2014-09-30 23:50:00 (Europe/Moscow) */ -/* timestamp 1420102800 == 2015-01-01 12:00:00 (Europe/Moscow) */ -/* timestamp 1428310800 == 2015-04-06 12:00:00 (Europe/Moscow) */ -/* timestamp 1436956200 == 2015-07-15 13:30:00 (Europe/Moscow) */ -/* timestamp 1426415400 == 2015-03-15 13:30:00 (Europe/Moscow) */ -/* timestamp 1549483055 == 2019-02-06 22:57:35 (Europe/Moscow) */ +/* timestamp 1419800400 == 2014-12-29 00:00:00 (Asia/Istanbul) */ +/* timestamp 1412106600 == 2014-09-30 23:50:00 (Asia/Istanbul) */ +/* timestamp 1420102800 == 2015-01-01 12:00:00 (Asia/Istanbul) */ +/* timestamp 1428310800 == 2015-04-06 12:00:00 (Asia/Istanbul) */ +/* timestamp 1436956200 == 2015-07-15 13:30:00 (Asia/Istanbul) */ +/* timestamp 1426415400 == 2015-03-15 13:30:00 (Asia/Istanbul) */ +/* timestamp 1549483055 == 2019-02-06 22:57:35 (Asia/Istanbul) */ /* date 16343 == 2014-09-30 */ /* date 16433 == 2014-12-29 */ /* date 17933 == 2019-02-06 */ @@ -14,12 +14,12 @@ /* toStartOfDay */ SELECT 'toStartOfDay'; -SELECT toStartOfDay(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toStartOfDay(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toStartOfDay(toDateTime(1412106600), 'Europe/Paris'); SELECT toStartOfDay(toDateTime(1412106600), 'Europe/London'); SELECT toStartOfDay(toDateTime(1412106600), 'Asia/Tokyo'); SELECT toStartOfDay(toDateTime(1412106600), 'Pacific/Pitcairn'); -SELECT toStartOfDay(toDate(16343), 'Europe/Moscow'); +SELECT toStartOfDay(toDate(16343), 'Asia/Istanbul'); SELECT toStartOfDay(toDate(16343), 'Europe/Paris'); SELECT toStartOfDay(toDate(16343), 'Europe/London'); SELECT toStartOfDay(toDate(16343), 'Asia/Tokyo'); @@ -28,7 +28,7 @@ SELECT toStartOfDay(toDate(16343), 'Pacific/Pitcairn'); /* toMonday */ SELECT 'toMonday'; -SELECT toMonday(toDateTime(1419800400), 'Europe/Moscow'); +SELECT toMonday(toDateTime(1419800400), 'Asia/Istanbul'); SELECT toMonday(toDateTime(1419800400), 'Europe/Paris'); SELECT toMonday(toDateTime(1419800400), 'Europe/London'); SELECT toMonday(toDateTime(1419800400), 'Asia/Tokyo'); @@ -42,7 +42,7 @@ SELECT toMonday(toDate(16433)); /* toStartOfMonth */ SELECT 'toStartOfMonth'; -SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/Moscow'); +SELECT toStartOfMonth(toDateTime(1419800400), 'Asia/Istanbul'); SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/Paris'); SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/London'); SELECT toStartOfMonth(toDateTime(1419800400), 'Asia/Tokyo'); @@ -56,7 +56,7 @@ SELECT toStartOfMonth(toDate(16433)); /* toStartOfQuarter */ SELECT 'toStartOfQuarter'; -SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toStartOfQuarter(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/Paris'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/London'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Asia/Tokyo'); @@ -70,7 +70,7 @@ SELECT toStartOfQuarter(toDate(16343)); /* toStartOfYear */ SELECT 'toStartOfYear'; -SELECT toStartOfYear(toDateTime(1419800400), 'Europe/Moscow'); +SELECT toStartOfYear(toDateTime(1419800400), 'Asia/Istanbul'); SELECT toStartOfYear(toDateTime(1419800400), 'Europe/Paris'); SELECT toStartOfYear(toDateTime(1419800400), 'Europe/London'); SELECT toStartOfYear(toDateTime(1419800400), 'Asia/Tokyo'); @@ -84,7 +84,7 @@ SELECT toStartOfYear(toDate(16433)); /* toTime */ SELECT 'toTime'; -SELECT toString(toTime(toDateTime(1420102800), 'Europe/Moscow'), 'Europe/Moscow'), toString(toTime(toDateTime(1428310800), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toTime(toDateTime(1420102800), 'Asia/Istanbul'), 'Asia/Istanbul'), toString(toTime(toDateTime(1428310800), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toTime(toDateTime(1420102800), 'Europe/Paris'), 'Europe/Paris'), toString(toTime(toDateTime(1428310800), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toTime(toDateTime(1420102800), 'Europe/London'), 'Europe/London'), toString(toTime(toDateTime(1428310800), 'Europe/London'), 'Europe/London'); SELECT toString(toTime(toDateTime(1420102800), 'Asia/Tokyo'), 'Asia/Tokyo'), toString(toTime(toDateTime(1428310800), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -93,7 +93,7 @@ SELECT toString(toTime(toDateTime(1420102800), 'Pacific/Pitcairn'), 'Pacific/Pit /* toYear */ SELECT 'toYear'; -SELECT toYear(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toYear(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toYear(toDateTime(1412106600), 'Europe/Paris'); SELECT toYear(toDateTime(1412106600), 'Europe/London'); SELECT toYear(toDateTime(1412106600), 'Asia/Tokyo'); @@ -102,7 +102,7 @@ SELECT toYear(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toMonth */ SELECT 'toMonth'; -SELECT toMonth(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toMonth(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toMonth(toDateTime(1412106600), 'Europe/Paris'); SELECT toMonth(toDateTime(1412106600), 'Europe/London'); SELECT toMonth(toDateTime(1412106600), 'Asia/Tokyo'); @@ -111,7 +111,7 @@ SELECT toMonth(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toDayOfMonth */ SELECT 'toDayOfMonth'; -SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toDayOfMonth(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/Paris'); SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/London'); SELECT toDayOfMonth(toDateTime(1412106600), 'Asia/Tokyo'); @@ -120,7 +120,7 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toDayOfWeek */ SELECT 'toDayOfWeek'; -SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris'); SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London'); SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Tokyo'); @@ -129,7 +129,7 @@ SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toHour */ SELECT 'toHour'; -SELECT toHour(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toHour(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toHour(toDateTime(1412106600), 'Europe/Paris'); SELECT toHour(toDateTime(1412106600), 'Europe/London'); SELECT toHour(toDateTime(1412106600), 'Asia/Tokyo'); @@ -138,7 +138,7 @@ SELECT toHour(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toMinute */ SELECT 'toMinute'; -SELECT toMinute(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toMinute(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toMinute(toDateTime(1412106600), 'Europe/Paris'); SELECT toMinute(toDateTime(1412106600), 'Europe/London'); SELECT toMinute(toDateTime(1412106600), 'Asia/Tokyo'); @@ -147,7 +147,7 @@ SELECT toMinute(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toSecond */ SELECT 'toSecond'; -SELECT toSecond(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toSecond(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toSecond(toDateTime(1412106600), 'Europe/Paris'); SELECT toSecond(toDateTime(1412106600), 'Europe/London'); SELECT toSecond(toDateTime(1412106600), 'Asia/Tokyo'); @@ -156,7 +156,7 @@ SELECT toSecond(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toStartOfMinute */ SELECT 'toStartOfMinute'; -SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -165,7 +165,7 @@ SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Pacific/Pitcairn'), 'Pa /* toStartOfFiveMinute */ SELECT 'toStartOfFiveMinute'; -SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -174,7 +174,7 @@ SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Pacific/Pitcairn'), /* toStartOfTenMinutes */ SELECT 'toStartOfTenMinutes'; -SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -183,7 +183,7 @@ SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Pacific/Pitcairn'), /* toStartOfFifteenMinutes */ SELECT 'toStartOfFifteenMinutes'; -SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -192,7 +192,7 @@ SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Pacific/Pitcair /* toStartOfHour */ SELECT 'toStartOfHour'; -SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfHour(toDateTime(1549483055), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfHour(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); @@ -201,33 +201,33 @@ SELECT toString(toStartOfHour(toDateTime(1549483055), 'Pacific/Pitcairn'), 'Paci /* toStartOfInterval */ SELECT 'toStartOfInterval'; -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 year, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 year, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 year, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 quarter, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 quarter, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 3 quarter, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 month, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 month, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 month, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 week, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 week, 'Europe/Moscow'); -SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 6 week, 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 hour, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 hour, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 6 hour, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 24 hour, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 20 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 90 minute, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 second, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 second, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 second, 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 year, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 year, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 year, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 quarter, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 quarter, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 3 quarter, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 month, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 month, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 month, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 week, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 week, 'Asia/Istanbul'); +SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 6 week, 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 hour, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 hour, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 6 hour, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 24 hour, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 20 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 90 minute, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 1 second, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 2 second, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDateTime(1549483055), INTERVAL 5 second, 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toStartOfInterval(toDate(17933), INTERVAL 1 year); SELECT toStartOfInterval(toDate(17933), INTERVAL 2 year); SELECT toStartOfInterval(toDate(17933), INTERVAL 5 year); @@ -240,14 +240,14 @@ SELECT toStartOfInterval(toDate(17933), INTERVAL 5 month); SELECT toStartOfInterval(toDate(17933), INTERVAL 1 week); SELECT toStartOfInterval(toDate(17933), INTERVAL 2 week); SELECT toStartOfInterval(toDate(17933), INTERVAL 6 week); -SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 1 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 2 day, 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 5 day, 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 1 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 2 day, 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 5 day, 'Asia/Istanbul'), 'Asia/Istanbul'); /* toRelativeYearNum */ SELECT 'toRelativeYearNum'; -SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeYearNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeYearNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeYearNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeYearNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/London') - toRelativeYearNum(toDateTime(0), 'Europe/London'); SELECT toRelativeYearNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeYearNum(toDateTime(0), 'Asia/Tokyo'); @@ -256,7 +256,7 @@ SELECT toRelativeYearNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelativ /* toRelativeMonthNum */ SELECT 'toRelativeMonthNum'; -SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeMonthNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeMonthNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeMonthNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeMonthNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/London') - toRelativeMonthNum(toDateTime(0), 'Europe/London'); SELECT toRelativeMonthNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeMonthNum(toDateTime(0), 'Asia/Tokyo'); @@ -265,7 +265,7 @@ SELECT toRelativeMonthNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelati /* toRelativeWeekNum */ SELECT 'toRelativeWeekNum'; -SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeWeekNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeWeekNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeWeekNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeWeekNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/London') - toRelativeWeekNum(toDateTime(0), 'Europe/London'); SELECT toRelativeWeekNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeWeekNum(toDateTime(0), 'Asia/Tokyo'); @@ -274,7 +274,7 @@ SELECT toRelativeWeekNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelativ /* toRelativeDayNum */ SELECT 'toRelativeDayNum'; -SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeDayNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeDayNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeDayNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeDayNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/London') - toRelativeDayNum(toDateTime(0), 'Europe/London'); SELECT toRelativeDayNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeDayNum(toDateTime(0), 'Asia/Tokyo'); @@ -284,7 +284,7 @@ SELECT toUInt16(toRelativeDayNum(toDateTime(1412106600), 'Pacific/Pitcairn') - t /* toRelativeHourNum */ SELECT 'toRelativeHourNum'; -SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeHourNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeHourNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeHourNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeHourNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/London') - toRelativeHourNum(toDateTime(0), 'Europe/London'); SELECT toRelativeHourNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeHourNum(toDateTime(0), 'Asia/Tokyo'); @@ -293,7 +293,7 @@ SELECT toRelativeHourNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelativ /* toRelativeMinuteNum */ SELECT 'toRelativeMinuteNum'; -SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeMinuteNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeMinuteNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeMinuteNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/London') - toRelativeMinuteNum(toDateTime(0), 'Europe/London'); SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeMinuteNum(toDateTime(0), 'Asia/Tokyo'); @@ -302,7 +302,7 @@ SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelat /* toRelativeSecondNum */ SELECT 'toRelativeSecondNum'; -SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeSecondNum(toDateTime(0), 'Europe/Moscow'); +SELECT toRelativeSecondNum(toDateTime(1412106600), 'Asia/Istanbul') - toRelativeSecondNum(toDateTime(0), 'Asia/Istanbul'); SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeSecondNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/London') - toRelativeSecondNum(toDateTime(0), 'Europe/London'); SELECT toRelativeSecondNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeSecondNum(toDateTime(0), 'Asia/Tokyo'); @@ -311,13 +311,13 @@ SELECT toRelativeSecondNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelat /* toDate */ SELECT 'toDate'; -SELECT toDate(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toDate(toDateTime(1412106600), 'Asia/Istanbul'); SELECT toDate(toDateTime(1412106600), 'Europe/Paris'); SELECT toDate(toDateTime(1412106600), 'Europe/London'); SELECT toDate(toDateTime(1412106600), 'Asia/Tokyo'); SELECT toDate(toDateTime(1412106600), 'Pacific/Pitcairn'); -SELECT toDate(1412106600, 'Europe/Moscow'); +SELECT toDate(1412106600, 'Asia/Istanbul'); SELECT toDate(1412106600, 'Europe/Paris'); SELECT toDate(1412106600, 'Europe/London'); SELECT toDate(1412106600, 'Asia/Tokyo'); @@ -328,7 +328,7 @@ SELECT toDate(16343); /* toString */ SELECT 'toString'; -SELECT toString(toDateTime(1436956200), 'Europe/Moscow'); +SELECT toString(toDateTime(1436956200), 'Asia/Istanbul'); SELECT toString(toDateTime(1436956200), 'Europe/Paris'); SELECT toString(toDateTime(1436956200), 'Europe/London'); SELECT toString(toDateTime(1436956200), 'Asia/Tokyo'); @@ -337,13 +337,13 @@ SELECT toString(toDateTime(1436956200), 'Pacific/Pitcairn'); /* toUnixTimestamp */ SELECT 'toUnixTimestamp'; -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/Moscow'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/Paris'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/London'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Asia/Tokyo'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Pacific/Pitcairn'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Asia/Istanbul'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Europe/Paris'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Europe/London'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Asia/Tokyo'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Pacific/Pitcairn'); -SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Istanbul'), 'Asia/Istanbul'); SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Paris'), 'Europe/Paris'); SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/London'), 'Europe/London'); SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Asia/Tokyo'), 'Asia/Tokyo'); diff --git a/tests/queries/0_stateless/00206_empty_array_to_single.sql b/tests/queries/0_stateless/00206_empty_array_to_single.sql index 0e3ff4f3537..85e8f82436d 100644 --- a/tests/queries/0_stateless/00206_empty_array_to_single.sql +++ b/tests/queries/0_stateless/00206_empty_array_to_single.sql @@ -1,5 +1,5 @@ SELECT emptyArrayToSingle(arrayFilter(x -> x != 99, arrayJoin([[1, 2], [99], [4, 5, 6]]))); -SELECT emptyArrayToSingle(emptyArrayString()), emptyArrayToSingle(emptyArrayDate()), emptyArrayToSingle(arrayFilter(x -> 0, [now('Europe/Moscow')])); +SELECT emptyArrayToSingle(emptyArrayString()), emptyArrayToSingle(emptyArrayDate()), emptyArrayToSingle(arrayFilter(x -> 0, [now('Asia/Istanbul')])); SELECT emptyArrayToSingle(range(number % 3)), diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 50c64b312a7..b2618343dc0 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -18,7 +18,7 @@ Hello "world", 789 ,2016-01-03 $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d, s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Europe/Moscow'), s String) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Asia/Istanbul'), s String) ENGINE = Memory"; echo '"2016-01-01 01:02:03","1" 2016-01-02 01:02:03, "2" @@ -29,7 +29,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Europe/Moscow')), s Nullable(String)) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Asia/Istanbul')), s Nullable(String)) ENGINE = Memory"; echo 'NULL, NULL "2016-01-01 01:02:03",NUL diff --git a/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh b/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh index 91ca0d7d869..f559b9d75bd 100755 --- a/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh +++ b/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh @@ -26,7 +26,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS json_noisy" echo $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS json_each_row" -$CLICKHOUSE_CLIENT -q "CREATE TABLE json_each_row (d DateTime('Europe/Moscow')) ENGINE = Memory" +$CLICKHOUSE_CLIENT -q "CREATE TABLE json_each_row (d DateTime('Asia/Istanbul')) ENGINE = Memory" echo '{"d" : "2017-08-31 18:36:48", "t" : ""} {"d" : "1504193808", "t" : -1} {"d" : 1504193808, "t" : []} diff --git a/tests/queries/0_stateless/00502_sum_map.sql b/tests/queries/0_stateless/00502_sum_map.sql index 3ceb5b82952..acc87cc5f16 100644 --- a/tests/queries/0_stateless/00502_sum_map.sql +++ b/tests/queries/0_stateless/00502_sum_map.sql @@ -31,7 +31,7 @@ select sumMap(val, cnt) from ( SELECT [ CAST(1, 'UInt64') ] as val, [1] as cnt ) select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Float64') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST('a', 'Enum16(\'a\'=1)') ] as val, [1] as cnt ); -select sumMap(val, cnt) from ( SELECT [ CAST(1, 'DateTime(\'Europe/Moscow\')') ] as val, [1] as cnt ); +select sumMap(val, cnt) from ( SELECT [ CAST(1, 'DateTime(\'Asia/Istanbul\')') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Date') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST('01234567-89ab-cdef-0123-456789abcdef', 'UUID') ] as val, [1] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST(1.01, 'Decimal(10,2)') ] as val, [1] as cnt ); diff --git a/tests/queries/0_stateless/00506_shard_global_in_union.sql b/tests/queries/0_stateless/00506_shard_global_in_union.sql index b3009add7e5..e51c18c5678 100644 --- a/tests/queries/0_stateless/00506_shard_global_in_union.sql +++ b/tests/queries/0_stateless/00506_shard_global_in_union.sql @@ -22,7 +22,7 @@ DROP TABLE IF EXISTS union_bug; CREATE TABLE union_bug ( Event String, - Datetime DateTime('Europe/Moscow') + Datetime DateTime('Asia/Istanbul') ) Engine = Memory; INSERT INTO union_bug VALUES ('A', 1), ('B', 2); diff --git a/tests/queries/0_stateless/00512_fractional_time_zones.sh b/tests/queries/0_stateless/00512_fractional_time_zones.sh index 45be8fe8d17..eb459d22704 100755 --- a/tests/queries/0_stateless/00512_fractional_time_zones.sh +++ b/tests/queries/0_stateless/00512_fractional_time_zones.sh @@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TZ=Europe/Moscow ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" +TZ=Asia/Istanbul ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" TZ=Asia/Colombo ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" TZ=Asia/Kathmandu ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" diff --git a/tests/queries/0_stateless/00515_enhanced_time_zones.sql b/tests/queries/0_stateless/00515_enhanced_time_zones.sql index cae487dfab6..e555b6b87bc 100644 --- a/tests/queries/0_stateless/00515_enhanced_time_zones.sql +++ b/tests/queries/0_stateless/00515_enhanced_time_zones.sql @@ -1,6 +1,6 @@ -SELECT addMonths(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 1, 'Asia/Kolkata'); +SELECT addMonths(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 1, 'Asia/Kolkata'); SELECT addMonths(toDateTime('2017-11-05 10:37:47', 'Asia/Kolkata'), 1); -SELECT addMonths(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Asia/Kolkata'), 1); +SELECT addMonths(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'), 1); SELECT addMonths(toDateTime('2017-11-05 08:07:47'), 1); SELECT addMonths(materialize(toDateTime('2017-11-05 08:07:47')), 1); @@ -12,11 +12,11 @@ SELECT addMonths(materialize(toDateTime('2017-11-05 08:07:47')), -1); SELECT addMonths(toDateTime('2017-11-05 08:07:47'), materialize(-1)); SELECT addMonths(materialize(toDateTime('2017-11-05 08:07:47')), materialize(-1)); -SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Europe/Moscow'); -SELECT toUnixTimestamp(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Europe/Moscow'); +SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Istanbul'); +SELECT toUnixTimestamp(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Istanbul'); -SELECT toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'); -SELECT toTimeZone(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Asia/Kolkata'); -SELECT toString(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow')); -SELECT toString(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Asia/Kolkata')); -SELECT toString(toDateTime('2017-11-05 08:07:47', 'Europe/Moscow'), 'Asia/Kolkata'); +SELECT toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'); +SELECT toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'); +SELECT toString(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul')); +SELECT toString(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata')); +SELECT toString(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'); diff --git a/tests/queries/0_stateless/00538_datediff.sql b/tests/queries/0_stateless/00538_datediff.sql index b76ab4ff3f8..5dc416ad0bd 100644 --- a/tests/queries/0_stateless/00538_datediff.sql +++ b/tests/queries/0_stateless/00538_datediff.sql @@ -43,12 +43,12 @@ SELECT DATEDIFF('year', today(), today() - INTERVAL 10 YEAR); SELECT 'Dependance of timezones'; -SELECT dateDiff('month', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('week', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('day', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); -SELECT dateDiff('second', toDate('2014-10-26'), toDate('2014-10-27'), 'Europe/Moscow'); +SELECT dateDiff('month', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('week', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('day', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT dateDiff('second', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); SELECT dateDiff('month', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); SELECT dateDiff('week', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); @@ -57,12 +57,12 @@ SELECT dateDiff('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); SELECT dateDiff('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); SELECT dateDiff('second', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); -SELECT dateDiff('month', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('week', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('day', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('hour', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('minute', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); -SELECT dateDiff('second', toDateTime('2014-10-26 00:00:00', 'Europe/Moscow'), toDateTime('2014-10-27 00:00:00', 'Europe/Moscow')); +SELECT dateDiff('month', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('week', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('day', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('hour', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('minute', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT dateDiff('second', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); SELECT dateDiff('month', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); SELECT dateDiff('week', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); diff --git a/tests/queries/0_stateless/00561_storage_join.sql b/tests/queries/0_stateless/00561_storage_join.sql index ad4fab09c20..913ecec6f4a 100644 --- a/tests/queries/0_stateless/00561_storage_join.sql +++ b/tests/queries/0_stateless/00561_storage_join.sql @@ -1,7 +1,7 @@ drop table IF EXISTS joinbug; CREATE TABLE joinbug ( - event_date Date MATERIALIZED toDate(created, 'Europe/Moscow'), + event_date Date MATERIALIZED toDate(created, 'Asia/Istanbul'), id UInt64, id2 UInt64, val UInt64, diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index cd679fe9735..899e23f9da1 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -42,4 +42,4 @@ SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), 'no formatting pattern' SELECT formatDateTime(toDate('2018-01-01'), '%F %T'); SELECT formatDateTime(toDateTime('2018-01-01 01:00:00', 'UTC'), '%F %T', 'UTC'), - formatDateTime(toDateTime('2018-01-01 01:00:00', 'UTC'), '%F %T', 'Europe/Moscow') \ No newline at end of file + formatDateTime(toDateTime('2018-01-01 01:00:00', 'UTC'), '%F %T', 'Asia/Istanbul') \ No newline at end of file diff --git a/tests/queries/0_stateless/00719_format_datetime_rand.sql b/tests/queries/0_stateless/00719_format_datetime_rand.sql index b91c988e1ca..ee596912bc7 100644 --- a/tests/queries/0_stateless/00719_format_datetime_rand.sql +++ b/tests/queries/0_stateless/00719_format_datetime_rand.sql @@ -8,4 +8,4 @@ WITH toDate(today() + rand() % 4096) AS t SELECT count() FROM numbers(1000000) W -- Note: in some other timezones, daylight saving time change happens in midnight, so the first time of day is 01:00:00 instead of 00:00:00. -- Stick to Moscow timezone to avoid this issue. -WITH toDate(today() + rand() % 4096) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%F %T', 'Europe/Moscow') != toString(toDateTime(t, 'Europe/Moscow')); +WITH toDate(today() + rand() % 4096) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%F %T', 'Asia/Istanbul') != toString(toDateTime(t, 'Asia/Istanbul')); diff --git a/tests/queries/0_stateless/00735_long_conditional.reference b/tests/queries/0_stateless/00735_long_conditional.reference index 082c2d49de9..f6c06e64066 100644 --- a/tests/queries/0_stateless/00735_long_conditional.reference +++ b/tests/queries/0_stateless/00735_long_conditional.reference @@ -92,9 +92,9 @@ value vs value 0 1 1 UInt64 Decimal(18, 0) Decimal(38, 0) 0 1 1 UInt64 Decimal(38, 0) Decimal(38, 0) 1970-01-01 1970-01-02 1970-01-02 Date Date Date -2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') -2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime(\'Europe/Moscow\') -1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Asia/Istanbul\') Date DateTime(\'Asia/Istanbul\') +1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') column vs value 0 1 1 Int8 Int8 Int8 0 1 1 Int8 Int16 Int16 @@ -189,6 +189,6 @@ column vs value 0 1 1 UInt64 Decimal(18, 0) Decimal(38, 0) 0 1 1 UInt64 Decimal(38, 0) Decimal(38, 0) 1970-01-01 1970-01-02 1970-01-02 Date Date Date -2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') -2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime(\'Europe/Moscow\') -1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Asia/Istanbul\') Date DateTime(\'Asia/Istanbul\') +1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') diff --git a/tests/queries/0_stateless/00735_long_conditional.sql b/tests/queries/0_stateless/00735_long_conditional.sql index 73febad1310..662c87db48f 100644 --- a/tests/queries/0_stateless/00735_long_conditional.sql +++ b/tests/queries/0_stateless/00735_long_conditional.sql @@ -15,7 +15,7 @@ SELECT toInt8(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt8(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toInt8(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toInt8(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt8(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -31,7 +31,7 @@ SELECT toInt16(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt16(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toInt16(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toInt16(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt16(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -47,7 +47,7 @@ SELECT toInt32(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt32(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toInt32(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toInt32(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt32(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -63,7 +63,7 @@ SELECT toInt64(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt64(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt64(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt64(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toInt64(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toInt64(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt64(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt64(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt64(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -79,7 +79,7 @@ SELECT toUInt8(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toUInt8(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUInt8(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toUInt8(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt8(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -95,7 +95,7 @@ SELECT toUInt16(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toUInt16(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUInt16(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toUInt16(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt16(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -111,7 +111,7 @@ SELECT toUInt32(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toUInt32(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUInt32(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toUInt32(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt32(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -127,7 +127,7 @@ SELECT toUInt64(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toUInt64(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt64(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt64(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUInt64(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toUInt64(0) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt64(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt64(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt64(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -143,26 +143,26 @@ SELECT toDate(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toDate(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDate('2000-01-01') AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDate('2000-01-01') AS x, toDateTime('2000-01-01 00:00:01', 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDate(0) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT toDateTime(0, 'Asia/Istanbul') AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT 'column vs value'; @@ -177,7 +177,7 @@ SELECT materialize(toInt8(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, to SELECT materialize(toInt8(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toInt8(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toInt8(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt8(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -193,7 +193,7 @@ SELECT materialize(toInt16(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toInt16(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toInt16(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toInt16(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt16(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -209,7 +209,7 @@ SELECT materialize(toInt32(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toInt32(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toInt32(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toInt32(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt32(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -225,7 +225,7 @@ SELECT materialize(toInt64(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toInt64(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt64(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt64(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toInt64(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toInt64(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt64(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt64(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt64(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -241,7 +241,7 @@ SELECT materialize(toUInt8(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toUInt8(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUInt8(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toUInt8(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt8(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -257,7 +257,7 @@ SELECT materialize(toUInt16(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt16(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUInt16(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toUInt16(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt16(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -273,7 +273,7 @@ SELECT materialize(toUInt32(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt32(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUInt32(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toUInt32(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt32(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -289,7 +289,7 @@ SELECT materialize(toUInt64(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt64(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt64(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt64(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUInt64(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toUInt64(0)) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt64(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt64(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt64(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -305,23 +305,23 @@ SELECT materialize(toDate(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, to SELECT materialize(toDate(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDate('2000-01-01')) AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDate('2000-01-01')) AS x, toDateTime('2000-01-01 00:00:01', 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDate(0)) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toDateTime(1, 'Asia/Istanbul') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } +SELECT materialize(toDateTime(0, 'Asia/Istanbul')) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } diff --git a/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql b/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql index fa98d087a62..a86a863124c 100644 --- a/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql +++ b/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql @@ -1,6 +1,6 @@ -- See comment in DateLUTImpl.cpp: "We doesn't support cases when time change results in switching to previous day..." SELECT - ignore(toDateTime(370641600, 'Europe/Moscow') AS t), + ignore(toDateTime(370641600, 'Asia/Istanbul') AS t), replaceRegexpAll(toString(t), '\\d', 'x'), toHour(t) < 24, replaceRegexpAll(formatDateTime(t, '%Y-%m-%d %H:%M:%S; %R:%S; %F %T'), '\\d', 'x'); diff --git a/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql b/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql index 4244ce2039b..1fdd9b20b2b 100644 --- a/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql +++ b/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql @@ -1,3 +1,3 @@ -- concat with empty string to defeat injectiveness of toString assumption. -SELECT concat('', toString(toDateTime('1981-09-29 00:00:00', 'Europe/Moscow') + INTERVAL number * 300 SECOND)) AS k FROM numbers(10000) GROUP BY k HAVING count() > 1 ORDER BY k; +SELECT concat('', toString(toDateTime('1981-09-29 00:00:00', 'Asia/Istanbul') + INTERVAL number * 300 SECOND)) AS k FROM numbers(10000) GROUP BY k HAVING count() > 1 ORDER BY k; SELECT concat('', toString(toDateTime('2018-09-19 00:00:00', 'Asia/Tehran') + INTERVAL number * 300 SECOND)) AS k FROM numbers(1000) GROUP BY k HAVING count() > 1 ORDER BY k; diff --git a/tests/queries/0_stateless/00825_protobuf_format_persons.sh b/tests/queries/0_stateless/00825_protobuf_format_persons.sh index bb376e6ed70..fe53e2995f9 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_persons.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_persons.sh @@ -28,7 +28,7 @@ CREATE TABLE persons_00825 (uuid UUID, photo Nullable(String), phoneNumber Nullable(FixedString(13)), isOnline UInt8, - visitTime Nullable(DateTime('Europe/Moscow')), + visitTime Nullable(DateTime('Asia/Istanbul')), age UInt8, zodiacSign Enum16('aries'=321, 'taurus'=420, 'gemini'=521, 'cancer'=621, 'leo'=723, 'virgo'=823, 'libra'=923, 'scorpius'=1023, 'sagittarius'=1122, 'capricorn'=1222, 'aquarius'=120, @@ -47,9 +47,9 @@ CREATE TABLE persons_00825 (uuid UUID, "nestiness_a_B.c_E" Array(UInt32) ) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO persons_00825 VALUES (toUUID('a7522158-3d41-4b77-ad69-6c598ee55c49'), 'Ivan', 'Petrov', 'male', toDate('1980-12-29'), 'png', '+74951234567', 1, toDateTime('2019-01-05 18:45:00', 'Europe/Moscow'), 38, 'capricorn', ['Yesterday', 'Flowers'], [255, 0, 0], 'Moscow', [55.753215, 37.622504], 3.14, 214.10, 0.1, 5.8, 17060000000, ['meter', 'centimeter', 'kilometer'], [1, 0.01, 1000], 500, [501, 502]); +INSERT INTO persons_00825 VALUES (toUUID('a7522158-3d41-4b77-ad69-6c598ee55c49'), 'Ivan', 'Petrov', 'male', toDate('1980-12-29'), 'png', '+74951234567', 1, toDateTime('2019-01-05 18:45:00', 'Asia/Istanbul'), 38, 'capricorn', ['Yesterday', 'Flowers'], [255, 0, 0], 'Moscow', [55.753215, 37.622504], 3.14, 214.10, 0.1, 5.8, 17060000000, ['meter', 'centimeter', 'kilometer'], [1, 0.01, 1000], 500, [501, 502]); INSERT INTO persons_00825 VALUES (toUUID('c694ad8a-f714-4ea3-907d-fd54fb25d9b5'), 'Natalia', 'Sokolova', 'female', toDate('1992-03-08'), 'jpg', NULL, 0, NULL, 26, 'pisces', [], [100, 200, 50], 'Plymouth', [50.403724, -4.142123], 3.14159, NULL, 0.007, 5.4, -20000000000000, [], [], NULL, []); -INSERT INTO persons_00825 VALUES (toUUID('a7da1aa6-f425-4789-8947-b034786ed374'), 'Vasily', 'Sidorov', 'male', toDate('1995-07-28'), 'bmp', '+442012345678', 1, toDateTime('2018-12-30 00:00:00', 'Europe/Moscow'), 23, 'leo', ['Sunny'], [250, 244, 10], 'Murmansk', [68.970682, 33.074981], 3.14159265358979, 100000000000, 800, -3.2, 154400000, ['pound'], [16], 503, []); +INSERT INTO persons_00825 VALUES (toUUID('a7da1aa6-f425-4789-8947-b034786ed374'), 'Vasily', 'Sidorov', 'male', toDate('1995-07-28'), 'bmp', '+442012345678', 1, toDateTime('2018-12-30 00:00:00', 'Asia/Istanbul'), 23, 'leo', ['Sunny'], [250, 244, 10], 'Murmansk', [68.970682, 33.074981], 3.14159265358979, 100000000000, 800, -3.2, 154400000, ['pound'], [16], 503, []); SELECT * FROM persons_00825 ORDER BY name; EOF diff --git a/tests/queries/0_stateless/00835_if_generic_case.sql b/tests/queries/0_stateless/00835_if_generic_case.sql index 011cea46ffc..63baffcf17d 100644 --- a/tests/queries/0_stateless/00835_if_generic_case.sql +++ b/tests/queries/0_stateless/00835_if_generic_case.sql @@ -1,17 +1,17 @@ -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, toDate('2000-01-02') AS y, x > y ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, materialize(toDate('2000-01-02')) AS y, x > y ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, toDate('2000-01-02') AS y, 0 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, materialize(toDate('2000-01-02')) AS y, 0 ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; -SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; -SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, toDate('2000-01-02') AS y, 1 ? x : y AS z; +SELECT toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul') AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; +SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Asia/Istanbul')) AS x, materialize(toDate('2000-01-02')) AS y, 1 ? x : y AS z; SELECT rand() % 2 = 0 ? number : number FROM numbers(5); diff --git a/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.sql b/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.sql index cf0e0bac3dd..c6151d3bdee 100644 --- a/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.sql +++ b/tests/queries/0_stateless/00910_decimal_group_array_crash_3783.sql @@ -8,7 +8,7 @@ select groupArray(s) from (select sum(n) s from (select toDecimal128(number, 10) DROP TABLE IF EXISTS sensor_value; CREATE TABLE sensor_value ( - received_at DateTime('Europe/Moscow'), + received_at DateTime('Asia/Istanbul'), device_id UUID, sensor_id UUID, value Nullable(Decimal(18, 4)), @@ -35,28 +35,28 @@ DROP TABLE sensor_value; select s.a, s.b, max(s.dt1) dt1, s.c, s.d, s.f, s.i, max(s.dt2) dt2 from ( select toUInt64(4360430) a , toUInt64(5681495) b - , toDateTime('2018-11-01 10:44:58', 'Europe/Moscow') dt1 + , toDateTime('2018-11-01 10:44:58', 'Asia/Istanbul') dt1 , 'txt' c , toDecimal128('274.350000000000', 12) d , toDecimal128(268.970000000000, 12) f , toDecimal128(0.000000000000, 12) i - , toDateTime('2018-11-02 00:00:00', 'Europe/Moscow') dt2 + , toDateTime('2018-11-02 00:00:00', 'Asia/Istanbul') dt2 union all select toUInt64(4341757) a , toUInt64(5657967) b - , toDateTime('2018-11-01 16:47:46', 'Europe/Moscow') dt1 + , toDateTime('2018-11-01 16:47:46', 'Asia/Istanbul') dt1 , 'txt' c , toDecimal128('321.380000000000', 12) d , toDecimal128(315.080000000000, 12) f , toDecimal128(0.000000000000, 12) i - , toDateTime('2018-11-02 00:00:00', 'Europe/Moscow') dt2 + , toDateTime('2018-11-02 00:00:00', 'Asia/Istanbul') dt2 union all select toUInt64(4360430) a , toUInt64(5681495) b - , toDateTime('2018-11-02 09:00:07', 'Europe/Moscow') dt1 + , toDateTime('2018-11-02 09:00:07', 'Asia/Istanbul') dt1 , 'txt' c , toDecimal128('274.350000000000', 12) d , toDecimal128(268.970000000000, 12) f , toDecimal128(0.000000000000, 12) i - , toDateTime('2018-11-02 00:00:00', 'Europe/Moscow') dt2 + , toDateTime('2018-11-02 00:00:00', 'Asia/Istanbul') dt2 ) s group by s.a, s.b, s.c, s.d, s.f, s.i ORDER BY s.a, s.b, s.c, s.d, s.f, s.i; diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python index 03cc088fd87..e4dbc716d7e 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python @@ -9,69 +9,69 @@ import argparse # Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime. FUNCTIONS=""" toTimeZone(N, 'UTC') -toYear(N, 'Europe/Moscow') -toQuarter(N, 'Europe/Moscow') -toMonth(N, 'Europe/Moscow') -toDayOfYear(N, 'Europe/Moscow') -toDayOfMonth(N, 'Europe/Moscow') -toDayOfWeek(N, 'Europe/Moscow') -toHour(N, 'Europe/Moscow') -toMinute(N, 'Europe/Moscow') -toSecond(N, 'Europe/Moscow') +toYear(N, 'Asia/Istanbul') +toQuarter(N, 'Asia/Istanbul') +toMonth(N, 'Asia/Istanbul') +toDayOfYear(N, 'Asia/Istanbul') +toDayOfMonth(N, 'Asia/Istanbul') +toDayOfWeek(N, 'Asia/Istanbul') +toHour(N, 'Asia/Istanbul') +toMinute(N, 'Asia/Istanbul') +toSecond(N, 'Asia/Istanbul') toUnixTimestamp(N) -toStartOfYear(N, 'Europe/Moscow') -toStartOfISOYear(N, 'Europe/Moscow') -toStartOfQuarter(N, 'Europe/Moscow') -toStartOfMonth(N, 'Europe/Moscow') -toMonday(N, 'Europe/Moscow') -toStartOfWeek(N, 'Europe/Moscow') -toStartOfDay(N, 'Europe/Moscow') -toStartOfHour(N, 'Europe/Moscow') -toStartOfMinute(N, 'Europe/Moscow') -toStartOfFiveMinute(N, 'Europe/Moscow') -toStartOfTenMinutes(N, 'Europe/Moscow') -toStartOfFifteenMinutes(N, 'Europe/Moscow') -toStartOfInterval(N, INTERVAL 1 year, 'Europe/Moscow') -toStartOfInterval(N, INTERVAL 1 month, 'Europe/Moscow') -toStartOfInterval(N, INTERVAL 1 day, 'Europe/Moscow') -toStartOfInterval(N, INTERVAL 15 minute, 'Europe/Moscow') -date_trunc('year', N, 'Europe/Moscow') -date_trunc('month', N, 'Europe/Moscow') -date_trunc('day', N, 'Europe/Moscow') -date_trunc('minute', N, 'Europe/Moscow') -toTime(N, 'Europe/Moscow') -toRelativeYearNum(N, 'Europe/Moscow') -toRelativeQuarterNum(N, 'Europe/Moscow') -toRelativeMonthNum(N, 'Europe/Moscow') -toRelativeWeekNum(N, 'Europe/Moscow') -toRelativeDayNum(N, 'Europe/Moscow') -toRelativeHourNum(N, 'Europe/Moscow') -toRelativeMinuteNum(N, 'Europe/Moscow') -toRelativeSecondNum(N, 'Europe/Moscow') -toISOYear(N, 'Europe/Moscow') -toISOWeek(N, 'Europe/Moscow') -toWeek(N, 'Europe/Moscow') -toYearWeek(N, 'Europe/Moscow') -timeSlot(N, 'Europe/Moscow') -toYYYYMM(N, 'Europe/Moscow') -toYYYYMMDD(N, 'Europe/Moscow') -toYYYYMMDDhhmmss(N, 'Europe/Moscow') -addYears(N, 1, 'Europe/Moscow') -addMonths(N, 1, 'Europe/Moscow') -addWeeks(N, 1, 'Europe/Moscow') -addDays(N, 1, 'Europe/Moscow') -addHours(N, 1, 'Europe/Moscow') -addMinutes(N, 1, 'Europe/Moscow') -addSeconds(N, 1, 'Europe/Moscow') -addQuarters(N, 1, 'Europe/Moscow') -subtractYears(N, 1, 'Europe/Moscow') -subtractMonths(N, 1, 'Europe/Moscow') -subtractWeeks(N, 1, 'Europe/Moscow') -subtractDays(N, 1, 'Europe/Moscow') -subtractHours(N, 1, 'Europe/Moscow') -subtractMinutes(N, 1, 'Europe/Moscow') -subtractSeconds(N, 1, 'Europe/Moscow') -subtractQuarters(N, 1, 'Europe/Moscow') +toStartOfYear(N, 'Asia/Istanbul') +toStartOfISOYear(N, 'Asia/Istanbul') +toStartOfQuarter(N, 'Asia/Istanbul') +toStartOfMonth(N, 'Asia/Istanbul') +toMonday(N, 'Asia/Istanbul') +toStartOfWeek(N, 'Asia/Istanbul') +toStartOfDay(N, 'Asia/Istanbul') +toStartOfHour(N, 'Asia/Istanbul') +toStartOfMinute(N, 'Asia/Istanbul') +toStartOfFiveMinute(N, 'Asia/Istanbul') +toStartOfTenMinutes(N, 'Asia/Istanbul') +toStartOfFifteenMinutes(N, 'Asia/Istanbul') +toStartOfInterval(N, INTERVAL 1 year, 'Asia/Istanbul') +toStartOfInterval(N, INTERVAL 1 month, 'Asia/Istanbul') +toStartOfInterval(N, INTERVAL 1 day, 'Asia/Istanbul') +toStartOfInterval(N, INTERVAL 15 minute, 'Asia/Istanbul') +date_trunc('year', N, 'Asia/Istanbul') +date_trunc('month', N, 'Asia/Istanbul') +date_trunc('day', N, 'Asia/Istanbul') +date_trunc('minute', N, 'Asia/Istanbul') +toTime(N, 'Asia/Istanbul') +toRelativeYearNum(N, 'Asia/Istanbul') +toRelativeQuarterNum(N, 'Asia/Istanbul') +toRelativeMonthNum(N, 'Asia/Istanbul') +toRelativeWeekNum(N, 'Asia/Istanbul') +toRelativeDayNum(N, 'Asia/Istanbul') +toRelativeHourNum(N, 'Asia/Istanbul') +toRelativeMinuteNum(N, 'Asia/Istanbul') +toRelativeSecondNum(N, 'Asia/Istanbul') +toISOYear(N, 'Asia/Istanbul') +toISOWeek(N, 'Asia/Istanbul') +toWeek(N, 'Asia/Istanbul') +toYearWeek(N, 'Asia/Istanbul') +timeSlot(N, 'Asia/Istanbul') +toYYYYMM(N, 'Asia/Istanbul') +toYYYYMMDD(N, 'Asia/Istanbul') +toYYYYMMDDhhmmss(N, 'Asia/Istanbul') +addYears(N, 1, 'Asia/Istanbul') +addMonths(N, 1, 'Asia/Istanbul') +addWeeks(N, 1, 'Asia/Istanbul') +addDays(N, 1, 'Asia/Istanbul') +addHours(N, 1, 'Asia/Istanbul') +addMinutes(N, 1, 'Asia/Istanbul') +addSeconds(N, 1, 'Asia/Istanbul') +addQuarters(N, 1, 'Asia/Istanbul') +subtractYears(N, 1, 'Asia/Istanbul') +subtractMonths(N, 1, 'Asia/Istanbul') +subtractWeeks(N, 1, 'Asia/Istanbul') +subtractDays(N, 1, 'Asia/Istanbul') +subtractHours(N, 1, 'Asia/Istanbul') +subtractMinutes(N, 1, 'Asia/Istanbul') +subtractSeconds(N, 1, 'Asia/Istanbul') +subtractQuarters(N, 1, 'Asia/Istanbul') CAST(N as DateTime('Europe/Minsk')) CAST(N as Date) CAST(N as UInt64) @@ -80,10 +80,10 @@ CAST(N as DateTime64(3, 'Europe/Minsk')) CAST(N as DateTime64(6, 'Europe/Minsk')) CAST(N as DateTime64(9, 'Europe/Minsk')) # Casting our test values to DateTime(12) will cause an overflow and hence will fail the test under UB sanitizer. -# CAST(N as DateTime64(12, 'Europe/Moscow')) +# CAST(N as DateTime64(12, 'Asia/Istanbul')) # DateTime64(18) will always fail due to zero precision, but it is Ok to test here: -# CAST(N as DateTime64(18, 'Europe/Moscow')) -formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Europe/Moscow') +# CAST(N as DateTime64(18, 'Asia/Istanbul')) +formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Asia/Istanbul') """.splitlines() # Expanded later to cartesian product of all arguments, using format string. diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference index 230bfa0c117..fa21fae0121 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference @@ -3,47 +3,47 @@ Code: 43 "DateTime('UTC')","2019-09-16 16:20:11" "DateTime64(3, 'UTC')","2019-09-16 16:20:11.234" ------------------------------------------ -SELECT toYear(N, \'Europe/Moscow\') +SELECT toYear(N, \'Asia/Istanbul\') "UInt16",2019 "UInt16",2019 "UInt16",2019 ------------------------------------------ -SELECT toQuarter(N, \'Europe/Moscow\') +SELECT toQuarter(N, \'Asia/Istanbul\') "UInt8",3 "UInt8",3 "UInt8",3 ------------------------------------------ -SELECT toMonth(N, \'Europe/Moscow\') +SELECT toMonth(N, \'Asia/Istanbul\') "UInt8",9 "UInt8",9 "UInt8",9 ------------------------------------------ -SELECT toDayOfYear(N, \'Europe/Moscow\') +SELECT toDayOfYear(N, \'Asia/Istanbul\') "UInt16",259 "UInt16",259 "UInt16",259 ------------------------------------------ -SELECT toDayOfMonth(N, \'Europe/Moscow\') +SELECT toDayOfMonth(N, \'Asia/Istanbul\') "UInt8",16 "UInt8",16 "UInt8",16 ------------------------------------------ -SELECT toDayOfWeek(N, \'Europe/Moscow\') +SELECT toDayOfWeek(N, \'Asia/Istanbul\') "UInt8",1 "UInt8",1 "UInt8",1 ------------------------------------------ -SELECT toHour(N, \'Europe/Moscow\') +SELECT toHour(N, \'Asia/Istanbul\') Code: 43 "UInt8",19 "UInt8",19 ------------------------------------------ -SELECT toMinute(N, \'Europe/Moscow\') +SELECT toMinute(N, \'Asia/Istanbul\') Code: 43 "UInt8",20 "UInt8",20 ------------------------------------------ -SELECT toSecond(N, \'Europe/Moscow\') +SELECT toSecond(N, \'Asia/Istanbul\') Code: 43 "UInt8",11 "UInt8",11 @@ -53,269 +53,269 @@ Code: 44 "UInt32",1568650811 "UInt32",1568650811 ------------------------------------------ -SELECT toStartOfYear(N, \'Europe/Moscow\') +SELECT toStartOfYear(N, \'Asia/Istanbul\') Code: 43 "Date","2019-01-01" "Date","2019-01-01" ------------------------------------------ -SELECT toStartOfISOYear(N, \'Europe/Moscow\') +SELECT toStartOfISOYear(N, \'Asia/Istanbul\') Code: 43 "Date","2018-12-31" "Date","2018-12-31" ------------------------------------------ -SELECT toStartOfQuarter(N, \'Europe/Moscow\') +SELECT toStartOfQuarter(N, \'Asia/Istanbul\') Code: 43 "Date","2019-07-01" "Date","2019-07-01" ------------------------------------------ -SELECT toStartOfMonth(N, \'Europe/Moscow\') +SELECT toStartOfMonth(N, \'Asia/Istanbul\') Code: 43 "Date","2019-09-01" "Date","2019-09-01" ------------------------------------------ -SELECT toMonday(N, \'Europe/Moscow\') +SELECT toMonday(N, \'Asia/Istanbul\') Code: 43 "Date","2019-09-16" "Date","2019-09-16" ------------------------------------------ -SELECT toStartOfWeek(N, \'Europe/Moscow\') +SELECT toStartOfWeek(N, \'Asia/Istanbul\') Code: 43 Code: 43 Code: 43 ------------------------------------------ -SELECT toStartOfDay(N, \'Europe/Moscow\') -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" +SELECT toStartOfDay(N, \'Asia/Istanbul\') +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" ------------------------------------------ -SELECT toStartOfHour(N, \'Europe/Moscow\') +SELECT toStartOfHour(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:00:00" -"DateTime('Europe/Moscow')","2019-09-16 19:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:00:00" ------------------------------------------ -SELECT toStartOfMinute(N, \'Europe/Moscow\') +SELECT toStartOfMinute(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" ------------------------------------------ -SELECT toStartOfFiveMinute(N, \'Europe/Moscow\') +SELECT toStartOfFiveMinute(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" ------------------------------------------ -SELECT toStartOfTenMinutes(N, \'Europe/Moscow\') +SELECT toStartOfTenMinutes(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" ------------------------------------------ -SELECT toStartOfFifteenMinutes(N, \'Europe/Moscow\') +SELECT toStartOfFifteenMinutes(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:15:00" -"DateTime('Europe/Moscow')","2019-09-16 19:15:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:15:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:15:00" ------------------------------------------ -SELECT toStartOfInterval(N, INTERVAL 1 year, \'Europe/Moscow\') +SELECT toStartOfInterval(N, INTERVAL 1 year, \'Asia/Istanbul\') Code: 43 "Date","2019-01-01" "Date","2019-01-01" ------------------------------------------ -SELECT toStartOfInterval(N, INTERVAL 1 month, \'Europe/Moscow\') +SELECT toStartOfInterval(N, INTERVAL 1 month, \'Asia/Istanbul\') Code: 43 "Date","2019-09-01" "Date","2019-09-01" ------------------------------------------ -SELECT toStartOfInterval(N, INTERVAL 1 day, \'Europe/Moscow\') -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" +SELECT toStartOfInterval(N, INTERVAL 1 day, \'Asia/Istanbul\') +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" ------------------------------------------ -SELECT toStartOfInterval(N, INTERVAL 15 minute, \'Europe/Moscow\') +SELECT toStartOfInterval(N, INTERVAL 15 minute, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:15:00" -"DateTime('Europe/Moscow')","2019-09-16 19:15:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:15:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:15:00" ------------------------------------------ -SELECT date_trunc(\'year\', N, \'Europe/Moscow\') +SELECT date_trunc(\'year\', N, \'Asia/Istanbul\') Code: 43 "Date","2019-01-01" "Date","2019-01-01" ------------------------------------------ -SELECT date_trunc(\'month\', N, \'Europe/Moscow\') +SELECT date_trunc(\'month\', N, \'Asia/Istanbul\') Code: 43 "Date","2019-09-01" "Date","2019-09-01" ------------------------------------------ -SELECT date_trunc(\'day\', N, \'Europe/Moscow\') -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" -"DateTime('Europe/Moscow')","2019-09-16 00:00:00" +SELECT date_trunc(\'day\', N, \'Asia/Istanbul\') +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 00:00:00" ------------------------------------------ -SELECT date_trunc(\'minute\', N, \'Europe/Moscow\') +SELECT date_trunc(\'minute\', N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" -"DateTime('Europe/Moscow')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:00" ------------------------------------------ -SELECT toTime(N, \'Europe/Moscow\') +SELECT toTime(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","1970-01-02 19:20:11" -"DateTime('Europe/Moscow')","1970-01-02 19:20:11" +"DateTime('Asia/Istanbul')","1970-01-02 19:20:11" +"DateTime('Asia/Istanbul')","1970-01-02 19:20:11" ------------------------------------------ -SELECT toRelativeYearNum(N, \'Europe/Moscow\') +SELECT toRelativeYearNum(N, \'Asia/Istanbul\') "UInt16",2019 "UInt16",2019 "UInt16",2019 ------------------------------------------ -SELECT toRelativeQuarterNum(N, \'Europe/Moscow\') +SELECT toRelativeQuarterNum(N, \'Asia/Istanbul\') "UInt32",8078 "UInt32",8078 "UInt32",8078 ------------------------------------------ -SELECT toRelativeMonthNum(N, \'Europe/Moscow\') +SELECT toRelativeMonthNum(N, \'Asia/Istanbul\') "UInt32",24237 "UInt32",24237 "UInt32",24237 ------------------------------------------ -SELECT toRelativeWeekNum(N, \'Europe/Moscow\') +SELECT toRelativeWeekNum(N, \'Asia/Istanbul\') "UInt32",2594 "UInt32",2594 "UInt32",2594 ------------------------------------------ -SELECT toRelativeDayNum(N, \'Europe/Moscow\') +SELECT toRelativeDayNum(N, \'Asia/Istanbul\') "UInt32",18155 "UInt32",18155 "UInt32",18155 ------------------------------------------ -SELECT toRelativeHourNum(N, \'Europe/Moscow\') +SELECT toRelativeHourNum(N, \'Asia/Istanbul\') "UInt32",435717 "UInt32",435736 "UInt32",435736 ------------------------------------------ -SELECT toRelativeMinuteNum(N, \'Europe/Moscow\') +SELECT toRelativeMinuteNum(N, \'Asia/Istanbul\') "UInt32",26143020 "UInt32",26144180 "UInt32",26144180 ------------------------------------------ -SELECT toRelativeSecondNum(N, \'Europe/Moscow\') +SELECT toRelativeSecondNum(N, \'Asia/Istanbul\') "UInt32",1568581200 "UInt32",1568650811 "UInt32",1568650811 ------------------------------------------ -SELECT toISOYear(N, \'Europe/Moscow\') +SELECT toISOYear(N, \'Asia/Istanbul\') "UInt16",2019 "UInt16",2019 "UInt16",2019 ------------------------------------------ -SELECT toISOWeek(N, \'Europe/Moscow\') +SELECT toISOWeek(N, \'Asia/Istanbul\') "UInt8",38 "UInt8",38 "UInt8",38 ------------------------------------------ -SELECT toWeek(N, \'Europe/Moscow\') +SELECT toWeek(N, \'Asia/Istanbul\') Code: 43 Code: 43 Code: 43 ------------------------------------------ -SELECT toYearWeek(N, \'Europe/Moscow\') +SELECT toYearWeek(N, \'Asia/Istanbul\') Code: 43 Code: 43 Code: 43 ------------------------------------------ -SELECT timeSlot(N, \'Europe/Moscow\') +SELECT timeSlot(N, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:00:00" -"DateTime('Europe/Moscow')","2019-09-16 19:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:00:00" +"DateTime('Asia/Istanbul')","2019-09-16 19:00:00" ------------------------------------------ -SELECT toYYYYMM(N, \'Europe/Moscow\') +SELECT toYYYYMM(N, \'Asia/Istanbul\') "UInt32",201909 "UInt32",201909 "UInt32",201909 ------------------------------------------ -SELECT toYYYYMMDD(N, \'Europe/Moscow\') +SELECT toYYYYMMDD(N, \'Asia/Istanbul\') "UInt32",20190916 "UInt32",20190916 "UInt32",20190916 ------------------------------------------ -SELECT toYYYYMMDDhhmmss(N, \'Europe/Moscow\') +SELECT toYYYYMMDDhhmmss(N, \'Asia/Istanbul\') "UInt64",20190916000000 "UInt64",20190916192011 "UInt64",20190916192011 ------------------------------------------ -SELECT addYears(N, 1, \'Europe/Moscow\') +SELECT addYears(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2020-09-16 19:20:11" +"DateTime('Asia/Istanbul')","2020-09-16 19:20:11" Code: 43 ------------------------------------------ -SELECT addMonths(N, 1, \'Europe/Moscow\') +SELECT addMonths(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-10-16 19:20:11" +"DateTime('Asia/Istanbul')","2019-10-16 19:20:11" Code: 43 ------------------------------------------ -SELECT addWeeks(N, 1, \'Europe/Moscow\') +SELECT addWeeks(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-23 19:20:11" +"DateTime('Asia/Istanbul')","2019-09-23 19:20:11" Code: 43 ------------------------------------------ -SELECT addDays(N, 1, \'Europe/Moscow\') +SELECT addDays(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-17 19:20:11" +"DateTime('Asia/Istanbul')","2019-09-17 19:20:11" Code: 43 ------------------------------------------ -SELECT addHours(N, 1, \'Europe/Moscow\') +SELECT addHours(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 20:20:11" +"DateTime('Asia/Istanbul')","2019-09-16 20:20:11" Code: 43 ------------------------------------------ -SELECT addMinutes(N, 1, \'Europe/Moscow\') +SELECT addMinutes(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:21:11" +"DateTime('Asia/Istanbul')","2019-09-16 19:21:11" Code: 43 ------------------------------------------ -SELECT addSeconds(N, 1, \'Europe/Moscow\') +SELECT addSeconds(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:12" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:12" Code: 43 ------------------------------------------ -SELECT addQuarters(N, 1, \'Europe/Moscow\') +SELECT addQuarters(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-12-16 19:20:11" +"DateTime('Asia/Istanbul')","2019-12-16 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractYears(N, 1, \'Europe/Moscow\') +SELECT subtractYears(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2018-09-16 19:20:11" +"DateTime('Asia/Istanbul')","2018-09-16 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractMonths(N, 1, \'Europe/Moscow\') +SELECT subtractMonths(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-08-16 19:20:11" +"DateTime('Asia/Istanbul')","2019-08-16 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractWeeks(N, 1, \'Europe/Moscow\') +SELECT subtractWeeks(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-09 19:20:11" +"DateTime('Asia/Istanbul')","2019-09-09 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractDays(N, 1, \'Europe/Moscow\') +SELECT subtractDays(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-15 19:20:11" +"DateTime('Asia/Istanbul')","2019-09-15 19:20:11" Code: 43 ------------------------------------------ -SELECT subtractHours(N, 1, \'Europe/Moscow\') +SELECT subtractHours(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 18:20:11" +"DateTime('Asia/Istanbul')","2019-09-16 18:20:11" Code: 43 ------------------------------------------ -SELECT subtractMinutes(N, 1, \'Europe/Moscow\') +SELECT subtractMinutes(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:19:11" +"DateTime('Asia/Istanbul')","2019-09-16 19:19:11" Code: 43 ------------------------------------------ -SELECT subtractSeconds(N, 1, \'Europe/Moscow\') +SELECT subtractSeconds(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-09-16 19:20:10" +"DateTime('Asia/Istanbul')","2019-09-16 19:20:10" Code: 43 ------------------------------------------ -SELECT subtractQuarters(N, 1, \'Europe/Moscow\') +SELECT subtractQuarters(N, 1, \'Asia/Istanbul\') Code: 43 -"DateTime('Europe/Moscow')","2019-06-16 19:20:11" +"DateTime('Asia/Istanbul')","2019-06-16 19:20:11" Code: 43 ------------------------------------------ SELECT CAST(N as DateTime(\'Europe/Minsk\')) @@ -353,7 +353,7 @@ SELECT CAST(N as DateTime64(9, \'Europe/Minsk\')) "DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.000000000" "DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.234000000" ------------------------------------------ -SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%\', \'Europe/Moscow\') +SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%\', \'Asia/Istanbul\') "String","20 16 09/16/19 16 2019-09-16 00 12 259 09 00 AM 00:00 00 00:00:00 1 38 1 19 2019 %" "String","20 16 09/16/19 16 2019-09-16 19 07 259 09 20 PM 19:20 11 19:20:11 1 38 1 19 2019 %" "String","20 16 09/16/19 16 2019-09-16 19 07 259 09 20 PM 19:20 11 19:20:11 1 38 1 19 2019 %" diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.sh b/tests/queries/0_stateless/00927_asof_join_other_types.sh index 0c17ca2085f..10173a3e43f 100755 --- a/tests/queries/0_stateless/00927_asof_join_other_types.sh +++ b/tests/queries/0_stateless/00927_asof_join_other_types.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Europe/Moscow')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Europe/Moscow')" +for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')" do $CLICKHOUSE_CLIENT -mn <= 5 LIMIT 15); -INSERT INTO bloom_filter_array_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); +CREATE TABLE bloom_filter_array_types_test (order_key Array(UInt64), i8 Array(Int8), i16 Array(Int16), i32 Array(Int32), i64 Array(Int64), u8 Array(UInt8), u16 Array(UInt16), u32 Array(UInt32), u64 Array(UInt64), f32 Array(Float32), f64 Array(Float64), date Array(Date), date_time Array(DateTime('Asia/Istanbul')), str Array(String), fixed_string Array(FixedString(5)), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO bloom_filter_array_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers LIMIT 15); +INSERT INTO bloom_filter_array_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); +INSERT INTO bloom_filter_array_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(i8, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(i16, 1); @@ -68,7 +68,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-02')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '1'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('1', 5)); @@ -83,7 +83,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-06')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '5'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('5', 5)); @@ -98,7 +98,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-11')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '10'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('10', 5)); @@ -106,8 +106,8 @@ DROP TABLE IF EXISTS bloom_filter_array_types_test; DROP TABLE IF EXISTS bloom_filter_null_types_test; -CREATE TABLE bloom_filter_null_types_test (order_key UInt64, i8 Nullable(Int8), i16 Nullable(Int16), i32 Nullable(Int32), i64 Nullable(Int64), u8 Nullable(UInt8), u16 Nullable(UInt16), u32 Nullable(UInt32), u64 Nullable(UInt64), f32 Nullable(Float32), f64 Nullable(Float64), date Nullable(Date), date_time Nullable(DateTime('Europe/Moscow')), str Nullable(String), fixed_string Nullable(FixedString(5)), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; -INSERT INTO bloom_filter_null_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Europe/Moscow') AS date, toDateTime(number, 'Europe/Moscow') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; +CREATE TABLE bloom_filter_null_types_test (order_key UInt64, i8 Nullable(Int8), i16 Nullable(Int16), i32 Nullable(Int32), i64 Nullable(Int64), u8 Nullable(UInt8), u16 Nullable(UInt16), u32 Nullable(UInt32), u64 Nullable(UInt64), f32 Nullable(Float32), f64 Nullable(Float64), date Nullable(Date), date_time Nullable(DateTime('Asia/Istanbul')), str Nullable(String), fixed_string Nullable(FixedString(5)), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO bloom_filter_null_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Asia/Istanbul') AS date, toDateTime(number, 'Asia/Istanbul') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; INSERT INTO bloom_filter_null_types_test SELECT 0 AS order_key, NULL AS i8, NULL AS i16, NULL AS i32, NULL AS i64, NULL AS u8, NULL AS u16, NULL AS u32, NULL AS u64, NULL AS f32, NULL AS f64, NULL AS date, NULL AS date_time, NULL AS str, NULL AS fixed_string; SELECT COUNT() FROM bloom_filter_null_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6; @@ -121,7 +121,7 @@ SELECT COUNT() FROM bloom_filter_null_types_test WHERE u64 = 1 SETTINGS max_rows SELECT COUNT() FROM bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; @@ -177,7 +177,7 @@ CREATE TABLE bloom_filter_array_lc_null_types_test ( f64 Array(LowCardinality(Nullable(Float64))), date Array(LowCardinality(Nullable(Date))), - date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))), + date_time Array(LowCardinality(Nullable(DateTime('Asia/Istanbul')))), str Array(LowCardinality(Nullable(String))), fixed_string Array(LowCardinality(Nullable(FixedString(5)))), @@ -197,16 +197,16 @@ SELECT groupArray(number) AS order_key, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, - groupArray(toDate(number, 'Europe/Moscow')) AS date, - groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, + groupArray(toDate(number, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers LIMIT 15); -INSERT INTO bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); -INSERT INTO bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); +INSERT INTO bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); +INSERT INTO bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Asia/Istanbul')) AS date, groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); INSERT INTO bloom_filter_array_lc_null_types_test SELECT n AS order_key, n AS i8, n AS i16, n AS i32, n AS i64, n AS u8, n AS u16, n AS u32, n AS u64, n AS f32, n AS f64, n AS date, n AS date_time, n AS str, n AS fixed_string FROM (SELECT [NULL] AS n); -INSERT INTO bloom_filter_array_lc_null_types_test SELECT [NULL, n] AS order_key, [NULL, toInt8(n)] AS i8, [NULL, toInt16(n)] AS i16, [NULL, toInt32(n)] AS i32, [NULL, toInt64(n)] AS i64, [NULL, toUInt8(n)] AS u8, [NULL, toUInt16(n)] AS u16, [NULL, toUInt32(n)] AS u32, [NULL, toUInt64(n)] AS u64, [NULL, toFloat32(n)] AS f32, [NULL, toFloat64(n)] AS f64, [NULL, toDate(n, 'Europe/Moscow')] AS date, [NULL, toDateTime(n, 'Europe/Moscow')] AS date_time, [NULL, toString(n)] AS str, [NULL, toFixedString(toString(n), 5)] AS fixed_string FROM (SELECT 100 as n); +INSERT INTO bloom_filter_array_lc_null_types_test SELECT [NULL, n] AS order_key, [NULL, toInt8(n)] AS i8, [NULL, toInt16(n)] AS i16, [NULL, toInt32(n)] AS i32, [NULL, toInt64(n)] AS i64, [NULL, toUInt8(n)] AS u8, [NULL, toUInt16(n)] AS u16, [NULL, toUInt32(n)] AS u32, [NULL, toUInt64(n)] AS u64, [NULL, toFloat32(n)] AS f32, [NULL, toFloat64(n)] AS f64, [NULL, toDate(n, 'Asia/Istanbul')] AS date, [NULL, toDateTime(n, 'Asia/Istanbul')] AS date_time, [NULL, toString(n)] AS str, [NULL, toFixedString(toString(n), 5)] AS fixed_string FROM (SELECT 100 as n); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(i8, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(i16, 1); @@ -219,7 +219,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-02')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '1'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('1', 5)); @@ -234,7 +234,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-06')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '5'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('5', 5)); @@ -249,7 +249,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-11')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '10'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('10', 5)); @@ -279,7 +279,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-04-11')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Europe/Moscow')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '100'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('100', 5)); diff --git a/tests/queries/0_stateless/01077_mutations_index_consistency.sh b/tests/queries/0_stateless/01077_mutations_index_consistency.sh index 31086ed6784..c41eab62ecb 100755 --- a/tests/queries/0_stateless/01077_mutations_index_consistency.sh +++ b/tests/queries/0_stateless/01077_mutations_index_consistency.sh @@ -7,9 +7,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement" -$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Europe/Moscow')) Engine = MergeTree ORDER BY (toStartOfHour(date));" +$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Asia/Istanbul')) Engine = MergeTree ORDER BY (toStartOfHour(date));" -$CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00', 'Europe/Moscow') + number%(23*3600) from numbers(1000000);" +$CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00', 'Asia/Istanbul') + number%(23*3600) from numbers(1000000);" $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE movement FINAL" @@ -18,20 +18,20 @@ SELECT count(), toStartOfHour(date) AS Hour FROM movement -WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Europe/Moscow')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Europe/Moscow')) +WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Asia/Istanbul')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Asia/Istanbul')) GROUP BY Hour ORDER BY Hour DESC " | grep "16:00:00" | cut -f1 -$CLICKHOUSE_CLIENT --query "alter table movement delete where date >= toDateTime('2020-01-22T16:00:00', 'Europe/Moscow') and date < toDateTime('2020-01-22T17:00:00', 'Europe/Moscow') SETTINGS mutations_sync = 2" +$CLICKHOUSE_CLIENT --query "alter table movement delete where date >= toDateTime('2020-01-22T16:00:00', 'Asia/Istanbul') and date < toDateTime('2020-01-22T17:00:00', 'Asia/Istanbul') SETTINGS mutations_sync = 2" $CLICKHOUSE_CLIENT -n --query " SELECT count(), toStartOfHour(date) AS Hour FROM movement -WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Europe/Moscow')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Europe/Moscow')) +WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Asia/Istanbul')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Asia/Istanbul')) GROUP BY Hour ORDER BY Hour DESC " | grep "16:00:00" | wc -l @@ -42,7 +42,7 @@ SELECT count(), toStartOfHour(date) AS Hour FROM movement -WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Europe/Moscow')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Europe/Moscow')) +WHERE (date >= toDateTime('2020-01-22T10:00:00', 'Asia/Istanbul')) AND (date <= toDateTime('2020-01-22T23:00:00', 'Asia/Istanbul')) GROUP BY Hour ORDER BY Hour DESC " | grep "22:00:00" | cut -f1 diff --git a/tests/queries/0_stateless/01087_storage_generate.sql b/tests/queries/0_stateless/01087_storage_generate.sql index a16ad55832c..7df9f3931d0 100644 --- a/tests/queries/0_stateless/01087_storage_generate.sql +++ b/tests/queries/0_stateless/01087_storage_generate.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS test_table; SELECT '-'; DROP TABLE IF EXISTS test_table_2; -CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Europe/Moscow'), UUID)) ENGINE=GenerateRandom(10, 5, 3); +CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Asia/Istanbul'), UUID)) ENGINE=GenerateRandom(10, 5, 3); SELECT * FROM test_table_2 LIMIT 100; diff --git a/tests/queries/0_stateless/01087_table_function_generate.reference b/tests/queries/0_stateless/01087_table_function_generate.reference index ead4e97403b..ef7eac41ca2 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/tests/queries/0_stateless/01087_table_function_generate.reference @@ -46,7 +46,7 @@ h \N o - -Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') 2113-06-12 2050-12-17 02:46:35 2096-02-16 22:18:22 2141-08-09 2013-10-17 23:35:26 1976-01-24 12:52:48 2039-08-16 1974-11-17 23:22:46 1980-03-04 21:02:50 @@ -58,7 +58,7 @@ Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') 2008-03-16 2047-05-16 23:28:36 2103-02-11 16:44:39 2000-07-07 2105-07-19 19:29:06 1980-01-02 05:18:22 - -DateTime64(3, \'Europe/Moscow\') DateTime64(6, \'Europe/Moscow\') DateTime64(6, \'Europe/Moscow\') +DateTime64(3, \'Asia/Istanbul\') DateTime64(6, \'Asia/Istanbul\') DateTime64(6, \'Asia/Istanbul\') 1978-06-07 23:50:57.320 2013-08-28 10:21:54.010758 1991-08-25 16:23:26.140215 1978-08-25 17:07:25.427 2034-05-02 20:49:42.148578 2015-08-26 15:26:31.783160 2037-04-04 10:50:56.898 2055-05-28 11:12:48.819271 2068-12-26 09:58:49.635722 diff --git a/tests/queries/0_stateless/01087_table_function_generate.sql b/tests/queries/0_stateless/01087_table_function_generate.sql index ef4311649f7..512121b7ecc 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.sql +++ b/tests/queries/0_stateless/01087_table_function_generate.sql @@ -42,20 +42,20 @@ LIMIT 10; SELECT '-'; SELECT toTypeName(d), toTypeName(dt), toTypeName(dtm) -FROM generateRandom('d Date, dt DateTime(\'Europe/Moscow\'), dtm DateTime(\'Europe/Moscow\')') +FROM generateRandom('d Date, dt DateTime(\'Asia/Istanbul\'), dtm DateTime(\'Asia/Istanbul\')') LIMIT 1; SELECT d, dt, dtm -FROM generateRandom('d Date, dt DateTime(\'Europe/Moscow\'), dtm DateTime(\'Europe/Moscow\')', 1, 10, 10) +FROM generateRandom('d Date, dt DateTime(\'Asia/Istanbul\'), dtm DateTime(\'Asia/Istanbul\')', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT toTypeName(dt64), toTypeName(dts64), toTypeName(dtms64) -FROM generateRandom('dt64 DateTime64(3, \'Europe/Moscow\'), dts64 DateTime64(6, \'Europe/Moscow\'), dtms64 DateTime64(6 ,\'Europe/Moscow\')') +FROM generateRandom('dt64 DateTime64(3, \'Asia/Istanbul\'), dts64 DateTime64(6, \'Asia/Istanbul\'), dtms64 DateTime64(6 ,\'Asia/Istanbul\')') LIMIT 1; SELECT dt64, dts64, dtms64 -FROM generateRandom('dt64 DateTime64(3, \'Europe/Moscow\'), dts64 DateTime64(6, \'Europe/Moscow\'), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 1, 10, 10) +FROM generateRandom('dt64 DateTime64(3, \'Asia/Istanbul\'), dts64 DateTime64(6, \'Asia/Istanbul\'), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 1, 10, 10) LIMIT 10; SELECT toTypeName(d32) @@ -176,8 +176,8 @@ FROM generateRandom('i String', 1, 10, 10) LIMIT 10; SELECT '-'; DROP TABLE IF EXISTS test_table; -CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Europe/Moscow'), UUID)) ENGINE=Memory; -INSERT INTO test_table SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, \'Europe/Moscow\'), UUID)', 1, 10, 2) +CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Asia/Istanbul'), UUID)) ENGINE=Memory; +INSERT INTO test_table SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, \'Asia/Istanbul\'), UUID)', 1, 10, 2) LIMIT 10; SELECT * FROM test_table ORDER BY a, d, c; @@ -187,8 +187,8 @@ DROP TABLE IF EXISTS test_table; SELECT '-'; DROP TABLE IF EXISTS test_table_2; -CREATE TABLE test_table_2(a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, g Tuple(Date, DateTime('Europe/Moscow'), DateTime64(3, 'Europe/Moscow'), UUID), h FixedString(2)) ENGINE=Memory; -INSERT INTO test_table_2 SELECT * FROM generateRandom('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime(\'Europe/Moscow\'), DateTime64(3, \'Europe/Moscow\'), UUID), h FixedString(2)', 10, 5, 3) +CREATE TABLE test_table_2(a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, g Tuple(Date, DateTime('Asia/Istanbul'), DateTime64(3, 'Asia/Istanbul'), UUID), h FixedString(2)) ENGINE=Memory; +INSERT INTO test_table_2 SELECT * FROM generateRandom('a Array(Int8), b UInt32, c Nullable(String), d Decimal32(4), e Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)), f Float64, g Tuple(Date, DateTime(\'Asia/Istanbul\'), DateTime64(3, \'Asia/Istanbul\'), UUID), h FixedString(2)', 10, 5, 3) LIMIT 10; SELECT a, b, c, d, e, f, g, hex(h) FROM test_table_2 ORDER BY a, b, c, d, e, f, g, h; diff --git a/tests/queries/0_stateless/01098_msgpack_format.sh b/tests/queries/0_stateless/01098_msgpack_format.sh index aa982c5478d..24638f33324 100755 --- a/tests/queries/0_stateless/01098_msgpack_format.sh +++ b/tests/queries/0_stateless/01098_msgpack_format.sh @@ -11,7 +11,7 @@ USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonex $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS msgpack"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime('Europe/Moscow'), datetime64 DateTime64(3, 'Europe/Moscow'), array Array(UInt32)) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime('Asia/Istanbul'), datetime64 DateTime64(3, 'Asia/Istanbul'), array Array(UInt32)) ENGINE = Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', 18980, 1639872000, 1639872000000, [1,2,3,4,5]), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', 20000, 1839882000, 1639872891123, [5,4,3,2,1]), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', 42, 42, 42, [42])"; diff --git a/tests/queries/0_stateless/01186_conversion_to_nullable.sql b/tests/queries/0_stateless/01186_conversion_to_nullable.sql index 828d3cac05b..52f7ee91f52 100644 --- a/tests/queries/0_stateless/01186_conversion_to_nullable.sql +++ b/tests/queries/0_stateless/01186_conversion_to_nullable.sql @@ -2,9 +2,9 @@ select toUInt8(x) from values('x Nullable(String)', '42', NULL, '0', '', '256'); select toInt64(x) from values('x Nullable(String)', '42', NULL, '0', '', '256'); select toDate(x) from values('x Nullable(String)', '2020-12-24', NULL, '0000-00-00', '', '9999-01-01'); -select toDateTime(x, 'Europe/Moscow') from values('x Nullable(String)', '2020-12-24 01:02:03', NULL, '0000-00-00 00:00:00', ''); -select toDateTime64(x, 2, 'Europe/Moscow') from values('x Nullable(String)', '2020-12-24 01:02:03', NULL, '0000-00-00 00:00:00', ''); -select toUnixTimestamp(x, 'Europe/Moscow') from values ('x Nullable(String)', '2000-01-01 13:12:12', NULL, ''); +select toDateTime(x, 'Asia/Istanbul') from values('x Nullable(String)', '2020-12-24 01:02:03', NULL, '0000-00-00 00:00:00', ''); +select toDateTime64(x, 2, 'Asia/Istanbul') from values('x Nullable(String)', '2020-12-24 01:02:03', NULL, '0000-00-00 00:00:00', ''); +select toUnixTimestamp(x, 'Asia/Istanbul') from values ('x Nullable(String)', '2000-01-01 13:12:12', NULL, ''); select toDecimal32(x, 2) from values ('x Nullable(String)', '42', NULL, '3.14159'); select toDecimal64(x, 8) from values ('x Nullable(String)', '42', NULL, '3.14159'); diff --git a/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper_long.sql b/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper_long.sql index a831fd18bfe..c5c1f2ebfd3 100644 --- a/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper_long.sql +++ b/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper_long.sql @@ -14,7 +14,7 @@ ENGINE = MergeTree() PARTITION BY date ORDER BY key; -INSERT INTO table_rename_with_default (date, key, value1) SELECT toDateTime(toDate('2019-10-01') + number % 3, 'Europe/Moscow'), number, toString(number) from numbers(9); +INSERT INTO table_rename_with_default (date, key, value1) SELECT toDateTime(toDate('2019-10-01') + number % 3, 'Asia/Istanbul'), number, toString(number) from numbers(9); SELECT * FROM table_rename_with_default WHERE key = 1 FORMAT TSVWithNames; @@ -44,7 +44,7 @@ ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_01213/table_rename_wit ORDER BY tuple() TTL date2 + INTERVAL 500 MONTH; -INSERT INTO table_rename_with_ttl SELECT toDateTime(toDate('2019-10-01') + number % 3, 'Europe/Moscow'), toDateTime(toDate('2018-10-01') + number % 3, 'Europe/Moscow'), toString(number), toString(number) from numbers(9); +INSERT INTO table_rename_with_ttl SELECT toDateTime(toDate('2019-10-01') + number % 3, 'Asia/Istanbul'), toDateTime(toDate('2018-10-01') + number % 3, 'Asia/Istanbul'), toString(number), toString(number) from numbers(9); SELECT * FROM table_rename_with_ttl WHERE value1 = '1' FORMAT TSVWithNames; diff --git a/tests/queries/0_stateless/01269_toStartOfSecond.sql b/tests/queries/0_stateless/01269_toStartOfSecond.sql index b74eaabf351..641da4a15a9 100644 --- a/tests/queries/0_stateless/01269_toStartOfSecond.sql +++ b/tests/queries/0_stateless/01269_toStartOfSecond.sql @@ -4,7 +4,7 @@ SELECT toStartOfSecond(now()); -- {serverError 43} SELECT toStartOfSecond(); -- {serverError 42} SELECT toStartOfSecond(now64(), 123); -- {serverError 43} -WITH toDateTime64('2019-09-16 19:20:11', 3, 'Europe/Moscow') AS dt64 SELECT toStartOfSecond(dt64, 'UTC') AS res, toTypeName(res); +WITH toDateTime64('2019-09-16 19:20:11', 3, 'Asia/Istanbul') AS dt64 SELECT toStartOfSecond(dt64, 'UTC') AS res, toTypeName(res); WITH toDateTime64('2019-09-16 19:20:11', 0, 'UTC') AS dt64 SELECT toStartOfSecond(dt64) AS res, toTypeName(res); WITH toDateTime64('2019-09-16 19:20:11.123', 3, 'UTC') AS dt64 SELECT toStartOfSecond(dt64) AS res, toTypeName(res); WITH toDateTime64('2019-09-16 19:20:11.123', 9, 'UTC') AS dt64 SELECT toStartOfSecond(dt64) AS res, toTypeName(res); diff --git a/tests/queries/0_stateless/01273_arrow_load.sh b/tests/queries/0_stateless/01273_arrow_load.sh index 2e213ce3a79..fa70255821a 100755 --- a/tests/queries/0_stateless/01273_arrow_load.sh +++ b/tests/queries/0_stateless/01273_arrow_load.sh @@ -12,7 +12,7 @@ CB_DIR=$(dirname "$CLICKHOUSE_CLIENT_BINARY") DATA_FILE=$CUR_DIR/data_arrow/test.arrow ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (bool UInt8, int8 Int8, int16 Int16, int32 Int32, int64 Int64, uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, halffloat Float32, float Float32, double Float64, string String, date32 Date, date64 DateTime('Europe/Moscow'), timestamp DateTime('Europe/Moscow')) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (bool UInt8, int8 Int8, int16 Int16, int32 Int32, int64 Int64, uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, halffloat Float32, float Float32, double Float64, string String, date32 Date, date64 DateTime('Asia/Istanbul'), timestamp DateTime('Asia/Istanbul')) ENGINE = Memory" cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "insert into arrow_load format Arrow" ${CLICKHOUSE_CLIENT} --query="select * from arrow_load" diff --git a/tests/queries/0_stateless/01277_toUnixTimestamp64.sql b/tests/queries/0_stateless/01277_toUnixTimestamp64.sql index eb3e8c612ed..42de53beb66 100644 --- a/tests/queries/0_stateless/01277_toUnixTimestamp64.sql +++ b/tests/queries/0_stateless/01277_toUnixTimestamp64.sql @@ -12,22 +12,22 @@ SELECT toUnixTimestamp64Micro('abc', 123); -- {serverError 42} SELECT toUnixTimestamp64Nano('abc', 123); -- {serverError 42} SELECT 'const column'; -WITH toDateTime64('2019-09-16 19:20:12.345678910', 3, 'Europe/Moscow') AS dt64 +WITH toDateTime64('2019-09-16 19:20:12.345678910', 3, 'Asia/Istanbul') AS dt64 SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); -WITH toDateTime64('2019-09-16 19:20:12.345678910', 6, 'Europe/Moscow') AS dt64 +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6, 'Asia/Istanbul') AS dt64 SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); -WITH toDateTime64('2019-09-16 19:20:12.345678910', 9, 'Europe/Moscow') AS dt64 +WITH toDateTime64('2019-09-16 19:20:12.345678910', 9, 'Asia/Istanbul') AS dt64 SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); SELECT 'non-const column'; -WITH toDateTime64('2019-09-16 19:20:12.345678910', 3, 'Europe/Moscow') AS x +WITH toDateTime64('2019-09-16 19:20:12.345678910', 3, 'Asia/Istanbul') AS x SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); -WITH toDateTime64('2019-09-16 19:20:12.345678910', 6, 'Europe/Moscow') AS x +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6, 'Asia/Istanbul') AS x SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); -WITH toDateTime64('2019-09-16 19:20:12.345678910', 9, 'Europe/Moscow') AS x +WITH toDateTime64('2019-09-16 19:20:12.345678910', 9, 'Asia/Istanbul') AS x SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); diff --git a/tests/queries/0_stateless/01280_min_map_max_map.sql b/tests/queries/0_stateless/01280_min_map_max_map.sql index 96fdfc61929..9bc8c320d93 100644 --- a/tests/queries/0_stateless/01280_min_map_max_map.sql +++ b/tests/queries/0_stateless/01280_min_map_max_map.sql @@ -15,7 +15,7 @@ select minMap(val, cnt) from values ('val Array(FixedString(1)), cnt Array(Fixed select minMap(val, cnt) from values ('val Array(UInt64), cnt Array(UInt64)', ([1], [1]), ([1], [2])); select minMap(val, cnt) from values ('val Array(Float64), cnt Array(Int8)', ([1], [1]), ([1], [2])); select minMap(val, cnt) from values ('val Array(Date), cnt Array(Int16)', ([1], [1]), ([1], [2])); -select minMap(val, cnt) from values ('val Array(DateTime(\'Europe/Moscow\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); +select minMap(val, cnt) from values ('val Array(DateTime(\'Asia/Istanbul\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); select minMap(val, cnt) from values ('val Array(Decimal(10, 2)), cnt Array(Int16)', (['1.01'], [1]), (['1.01'], [2])); select minMap(val, cnt) from values ('val Array(Enum16(\'a\'=1)), cnt Array(Int16)', (['a'], [1]), (['a'], [2])); @@ -28,7 +28,7 @@ select maxMap(val, cnt) from values ('val Array(FixedString(1)), cnt Array(Fixed select maxMap(val, cnt) from values ('val Array(UInt64), cnt Array(UInt64)', ([1], [1]), ([1], [2])); select maxMap(val, cnt) from values ('val Array(Float64), cnt Array(Int8)', ([1], [1]), ([1], [2])); select maxMap(val, cnt) from values ('val Array(Date), cnt Array(Int16)', ([1], [1]), ([1], [2])); -select maxMap(val, cnt) from values ('val Array(DateTime(\'Europe/Moscow\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); +select maxMap(val, cnt) from values ('val Array(DateTime(\'Asia/Istanbul\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); select maxMap(val, cnt) from values ('val Array(Decimal(10, 2)), cnt Array(Int16)', (['1.01'], [1]), (['1.01'], [2])); select maxMap(val, cnt) from values ('val Array(Enum16(\'a\'=1)), cnt Array(Int16)', (['a'], [1]), (['a'], [2])); diff --git a/tests/queries/0_stateless/01307_orc_output_format.sh b/tests/queries/0_stateless/01307_orc_output_format.sh index 926398e55bd..b17792af051 100755 --- a/tests/queries/0_stateless/01307_orc_output_format.sh +++ b/tests/queries/0_stateless/01307_orc_output_format.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, fixed FixedString(4), date Date, datetime DateTime('Europe/Moscow'), decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, fixed FixedString(4), date Date, datetime DateTime('Asia/Istanbul'), decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', '2020', 18980, 1639872000, 1.0001, 1.00000001, 100000.00000000000001, 1), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', '2000', 20000, 1839882000, 34.1234, 123123.123123123, 123123123.123123123123123, NULL), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', '4242', 42, 42, 42.42, 42.42424242, 424242.42424242424242, 42)"; diff --git a/tests/queries/0_stateless/01379_with_fill_several_columns.sql b/tests/queries/0_stateless/01379_with_fill_several_columns.sql index 505b9e0f8e1..6bdf7d41b57 100644 --- a/tests/queries/0_stateless/01379_with_fill_several_columns.sql +++ b/tests/queries/0_stateless/01379_with_fill_several_columns.sql @@ -1,6 +1,6 @@ SELECT - toDate(toDateTime((number * 10) * 86400, 'Europe/Moscow')) AS d1, - toDate(toDateTime(number * 86400, 'Europe/Moscow')) AS d2, + toDate(toDateTime((number * 10) * 86400, 'Asia/Istanbul')) AS d1, + toDate(toDateTime(number * 86400, 'Asia/Istanbul')) AS d2, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 @@ -11,8 +11,8 @@ ORDER BY SELECT '==============='; SELECT - toDate(toDateTime((number * 10) * 86400, 'Europe/Moscow')) AS d1, - toDate(toDateTime(number * 86400, 'Europe/Moscow')) AS d2, + toDate(toDateTime((number * 10) * 86400, 'Asia/Istanbul')) AS d1, + toDate(toDateTime(number * 86400, 'Asia/Istanbul')) AS d2, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 diff --git a/tests/queries/0_stateless/01396_negative_datetime_saturate_to_zero.sql b/tests/queries/0_stateless/01396_negative_datetime_saturate_to_zero.sql index 2711ab3cceb..e52c2d3dd1b 100644 --- a/tests/queries/0_stateless/01396_negative_datetime_saturate_to_zero.sql +++ b/tests/queries/0_stateless/01396_negative_datetime_saturate_to_zero.sql @@ -1 +1 @@ -SELECT toTimeZone(now(), 'Europe/Moscow') > '1970-01-01 00:00:00'; +SELECT toTimeZone(now(), 'Asia/Istanbul') > '1970-01-01 00:00:00'; diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql index c11e990cea8..871d74d7fb9 100644 --- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql +++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql @@ -15,7 +15,7 @@ CREATE TABLE lc_nullable ( f64 Array(LowCardinality(Nullable(Float64))), date Array(LowCardinality(Nullable(Date))), - date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))), + date_time Array(LowCardinality(Nullable(DateTime('Asia/Istanbul')))), str Array(LowCardinality(Nullable(String))), fixed_string Array(LowCardinality(Nullable(FixedString(5)))) @@ -33,8 +33,8 @@ INSERT INTO lc_nullable SELECT groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, - groupArray(toDate(number, 'Europe/Moscow')) AS date, - groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, + groupArray(toDate(number, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers LIMIT 15); @@ -51,8 +51,8 @@ INSERT INTO lc_nullable SELECT groupArray(toUInt64(num)) AS u64, groupArray(toFloat32(num)) AS f32, groupArray(toFloat64(num)) AS f64, - groupArray(toDate(num, 'Europe/Moscow')) AS date, - groupArray(toDateTime(num, 'Europe/Moscow')) AS date_time, + groupArray(toDate(num, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(num, 'Asia/Istanbul')) AS date_time, groupArray(toString(num)) AS str, groupArray(toFixedString(toString(num), 5)) AS fixed_string FROM (SELECT negate(number) as num FROM system.numbers LIMIT 15); @@ -69,8 +69,8 @@ INSERT INTO lc_nullable SELECT groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, - groupArray(toDate(number, 'Europe/Moscow')) AS date, - groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, + groupArray(toDate(number, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); @@ -87,8 +87,8 @@ INSERT INTO lc_nullable SELECT groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, - groupArray(toDate(number, 'Europe/Moscow')) AS date, - groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, + groupArray(toDate(number, 'Asia/Istanbul')) AS date, + groupArray(toDateTime(number, 'Asia/Istanbul')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); @@ -123,8 +123,8 @@ INSERT INTO lc_nullable SELECT [NULL, toUInt64(n)] AS u64, [NULL, toFloat32(n)] AS f32, [NULL, toFloat64(n)] AS f64, - [NULL, toDate(n, 'Europe/Moscow')] AS date, - [NULL, toDateTime(n, 'Europe/Moscow')] AS date_time, + [NULL, toDate(n, 'Asia/Istanbul')] AS date, + [NULL, toDateTime(n, 'Asia/Istanbul')] AS date_time, [NULL, toString(n)] AS str, [NULL, toFixedString(toString(n), 5)] AS fixed_string FROM (SELECT 100 as n); @@ -140,7 +140,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 1); SELECT count() FROM lc_nullable WHERE has(f32, 1); SELECT count() FROM lc_nullable WHERE has(f64, 1); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-02')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Europe/Moscow')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '1'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('1', 5)); @@ -168,7 +168,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 5); SELECT count() FROM lc_nullable WHERE has(f32, 5); SELECT count() FROM lc_nullable WHERE has(f64, 5); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-06')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Europe/Moscow')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '5'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('5', 5)); @@ -183,7 +183,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 10); SELECT count() FROM lc_nullable WHERE has(f32, 10); SELECT count() FROM lc_nullable WHERE has(f64, 10); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-11')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Europe/Moscow')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '10'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('10', 5)); @@ -213,7 +213,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 100); SELECT count() FROM lc_nullable WHERE has(f32, 100); SELECT count() FROM lc_nullable WHERE has(f64, 100); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-04-11')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Europe/Moscow')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '100'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('100', 5)); diff --git a/tests/queries/0_stateless/01432_parse_date_time_best_effort_timestamp.sql b/tests/queries/0_stateless/01432_parse_date_time_best_effort_timestamp.sql index 071fefe0403..58759c8585b 100644 --- a/tests/queries/0_stateless/01432_parse_date_time_best_effort_timestamp.sql +++ b/tests/queries/0_stateless/01432_parse_date_time_best_effort_timestamp.sql @@ -1,3 +1,3 @@ -SELECT parseDateTimeBestEffort('1596752940', 'Europe/Moscow'); -SELECT parseDateTimeBestEffort('100000000', 'Europe/Moscow'); -SELECT parseDateTimeBestEffort('20200807', 'Europe/Moscow'); +SELECT parseDateTimeBestEffort('1596752940', 'Asia/Istanbul'); +SELECT parseDateTimeBestEffort('100000000', 'Asia/Istanbul'); +SELECT parseDateTimeBestEffort('20200807', 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01440_to_date_monotonicity.sql b/tests/queries/0_stateless/01440_to_date_monotonicity.sql index 8843d7ffca6..b4c4f98b223 100644 --- a/tests/queries/0_stateless/01440_to_date_monotonicity.sql +++ b/tests/queries/0_stateless/01440_to_date_monotonicity.sql @@ -1,11 +1,11 @@ DROP TABLE IF EXISTS tdm; DROP TABLE IF EXISTS tdm2; -CREATE TABLE tdm (x DateTime('Europe/Moscow')) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0; +CREATE TABLE tdm (x DateTime('Asia/Istanbul')) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0; INSERT INTO tdm VALUES (now()); -SELECT count(x) FROM tdm WHERE toDate(x) < toDate(now(), 'Europe/Moscow') SETTINGS max_rows_to_read = 1; +SELECT count(x) FROM tdm WHERE toDate(x) < toDate(now(), 'Asia/Istanbul') SETTINGS max_rows_to_read = 1; -SELECT toDate(-1), toDate(10000000000000, 'Europe/Moscow'), toDate(100), toDate(65536, 'UTC'), toDate(65535, 'Europe/Moscow'); -SELECT toDateTime(-1, 'Europe/Moscow'), toDateTime(10000000000000, 'Europe/Moscow'), toDateTime(1000, 'Europe/Moscow'); +SELECT toDate(-1), toDate(10000000000000, 'Asia/Istanbul'), toDate(100), toDate(65536, 'UTC'), toDate(65535, 'Asia/Istanbul'); +SELECT toDateTime(-1, 'Asia/Istanbul'), toDateTime(10000000000000, 'Asia/Istanbul'), toDateTime(1000, 'Asia/Istanbul'); CREATE TABLE tdm2 (timestamp UInt32) ENGINE = MergeTree ORDER BY timestamp SETTINGS index_granularity = 1; diff --git a/tests/queries/0_stateless/01442_date_time_with_params.reference b/tests/queries/0_stateless/01442_date_time_with_params.reference index 726e59d4d35..bc819d5f8fc 100644 --- a/tests/queries/0_stateless/01442_date_time_with_params.reference +++ b/tests/queries/0_stateless/01442_date_time_with_params.reference @@ -1,6 +1,6 @@ -2020-01-01 00:00:00 DateTime 2020-01-01 00:01:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Europe/Moscow\') 2020-01-01 00:04:00.220 DateTime64(3, \'Europe/Moscow\') 2020-01-01 00:05:00 DateTime 2020-01-01 00:06:00 DateTime(\'Europe/Moscow\') 2020-01-01 00:06:00 DateTime -2020-01-01 00:00:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Europe/Moscow\') 2020-01-01 00:04:00.220 DateTime64(3, \'Europe/Moscow\') 2020-01-01 00:05:00 DateTime -2020-01-01 00:00:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Europe/Moscow\') 2020-01-01 00:04:00.220 DateTime64(3, \'Europe/Moscow\') 2020-01-01 00:05:00 DateTime +2020-01-01 00:00:00 DateTime 2020-01-01 00:01:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Asia/Istanbul\') 2020-01-01 00:04:00.220 DateTime64(3, \'Asia/Istanbul\') 2020-01-01 00:05:00 DateTime 2020-01-01 00:06:00 DateTime(\'Asia/Istanbul\') 2020-01-01 00:06:00 DateTime +2020-01-01 00:00:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Asia/Istanbul\') 2020-01-01 00:04:00.220 DateTime64(3, \'Asia/Istanbul\') 2020-01-01 00:05:00 DateTime +2020-01-01 00:00:00 DateTime 2020-01-01 00:02:00.11 DateTime64(2) 2020-01-01 00:03:00 DateTime(\'Asia/Istanbul\') 2020-01-01 00:04:00.220 DateTime64(3, \'Asia/Istanbul\') 2020-01-01 00:05:00 DateTime 2020-01-01 00:00:00 DateTime parseDateTimeBestEffort 2020-05-14 03:37:03.000 DateTime64(3, \'UTC\') diff --git a/tests/queries/0_stateless/01442_date_time_with_params.sql b/tests/queries/0_stateless/01442_date_time_with_params.sql index 5a57aabdb0c..aeb9aa597ef 100644 --- a/tests/queries/0_stateless/01442_date_time_with_params.sql +++ b/tests/queries/0_stateless/01442_date_time_with_params.sql @@ -1,14 +1,14 @@ DROP TABLE IF EXISTS test; -CREATE TABLE test (a DateTime, b DateTime(), c DateTime(2), d DateTime('Europe/Moscow'), e DateTime(3, 'Europe/Moscow'), f DateTime32, g DateTime32('Europe/Moscow'), h DateTime(0)) ENGINE = MergeTree ORDER BY a; +CREATE TABLE test (a DateTime, b DateTime(), c DateTime(2), d DateTime('Asia/Istanbul'), e DateTime(3, 'Asia/Istanbul'), f DateTime32, g DateTime32('Asia/Istanbul'), h DateTime(0)) ENGINE = MergeTree ORDER BY a; INSERT INTO test VALUES('2020-01-01 00:00:00', '2020-01-01 00:01:00', '2020-01-01 00:02:00.11', '2020-01-01 00:03:00', '2020-01-01 00:04:00.22', '2020-01-01 00:05:00', '2020-01-01 00:06:00', '2020-01-01 00:06:00'); SELECT a, toTypeName(a), b, toTypeName(b), c, toTypeName(c), d, toTypeName(d), e, toTypeName(e), f, toTypeName(f), g, toTypeName(g), h, toTypeName(h) FROM test; -SELECT toDateTime('2020-01-01 00:00:00') AS a, toTypeName(a), toDateTime('2020-01-01 00:02:00.11', 2) AS b, toTypeName(b), toDateTime('2020-01-01 00:03:00', 'Europe/Moscow') AS c, toTypeName(c), toDateTime('2020-01-01 00:04:00.22', 3, 'Europe/Moscow') AS d, toTypeName(d), toDateTime('2020-01-01 00:05:00', 0) AS e, toTypeName(e); +SELECT toDateTime('2020-01-01 00:00:00') AS a, toTypeName(a), toDateTime('2020-01-01 00:02:00.11', 2) AS b, toTypeName(b), toDateTime('2020-01-01 00:03:00', 'Asia/Istanbul') AS c, toTypeName(c), toDateTime('2020-01-01 00:04:00.22', 3, 'Asia/Istanbul') AS d, toTypeName(d), toDateTime('2020-01-01 00:05:00', 0) AS e, toTypeName(e); -SELECT CAST('2020-01-01 00:00:00', 'DateTime') AS a, toTypeName(a), CAST('2020-01-01 00:02:00.11', 'DateTime(2)') AS b, toTypeName(b), CAST('2020-01-01 00:03:00', 'DateTime(\'Europe/Moscow\')') AS c, toTypeName(c), CAST('2020-01-01 00:04:00.22', 'DateTime(3, \'Europe/Moscow\')') AS d, toTypeName(d), CAST('2020-01-01 00:05:00', 'DateTime(0)') AS e, toTypeName(e); +SELECT CAST('2020-01-01 00:00:00', 'DateTime') AS a, toTypeName(a), CAST('2020-01-01 00:02:00.11', 'DateTime(2)') AS b, toTypeName(b), CAST('2020-01-01 00:03:00', 'DateTime(\'Asia/Istanbul\')') AS c, toTypeName(c), CAST('2020-01-01 00:04:00.22', 'DateTime(3, \'Asia/Istanbul\')') AS d, toTypeName(d), CAST('2020-01-01 00:05:00', 'DateTime(0)') AS e, toTypeName(e); SELECT toDateTime32('2020-01-01 00:00:00') AS a, toTypeName(a); diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.queries b/tests/queries/0_stateless/01508_partition_pruning_long.queries index 786240145a9..0d64fc05f0f 100644 --- a/tests/queries/0_stateless/01508_partition_pruning_long.queries +++ b/tests/queries/0_stateless/01508_partition_pruning_long.queries @@ -2,20 +2,20 @@ DROP TABLE IF EXISTS tMM; DROP TABLE IF EXISTS tDD; DROP TABLE IF EXISTS sDD; DROP TABLE IF EXISTS xMM; -CREATE TABLE tMM(d DateTime('Europe/Moscow'), a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; +CREATE TABLE tMM(d DateTime('Asia/Istanbul'), a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES tMM; -INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00', 'Europe/Moscow') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -CREATE TABLE tDD(d DateTime('Europe/Moscow'),a Int) ENGINE = MergeTree PARTITION BY toYYYYMMDD(d) ORDER BY tuple() SETTINGS index_granularity = 8192; +CREATE TABLE tDD(d DateTime('Asia/Istanbul'),a Int) ENGINE = MergeTree PARTITION BY toYYYYMMDD(d) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES tDD; -insert into tDD select toDateTime(toDate('2020-09-23'), 'Europe/Moscow'), number from numbers(10000) UNION ALL select toDateTime(toDateTime('2020-09-23 11:00:00', 'Europe/Moscow')), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-24'), 'Europe/Moscow'), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-25'), 'Europe/Moscow'), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-08-15'), 'Europe/Moscow'), number from numbers(10000); +insert into tDD select toDateTime(toDate('2020-09-23'), 'Asia/Istanbul'), number from numbers(10000) UNION ALL select toDateTime(toDateTime('2020-09-23 11:00:00', 'Asia/Istanbul')), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-24'), 'Asia/Istanbul'), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-09-25'), 'Asia/Istanbul'), number from numbers(10000) UNION ALL select toDateTime(toDate('2020-08-15'), 'Asia/Istanbul'), number from numbers(10000); -CREATE TABLE sDD(d UInt64,a Int) ENGINE = MergeTree PARTITION BY toYYYYMM(toDate(intDiv(d,1000), 'Europe/Moscow')) ORDER BY tuple() SETTINGS index_granularity = 8192; +CREATE TABLE sDD(d UInt64,a Int) ENGINE = MergeTree PARTITION BY toYYYYMM(toDate(intDiv(d,1000), 'Asia/Istanbul')) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES sDD; insert into sDD select (1597536000+number*60)*1000, number from numbers(5000); insert into sDD select (1597536000+number*60)*1000, number from numbers(5000); @@ -24,14 +24,14 @@ insert into sDD select (1598918400+number*60)*1000, number from numbers(5000); insert into sDD select (1601510400+number*60)*1000, number from numbers(5000); insert into sDD select (1602720000+number*60)*1000, number from numbers(5000); -CREATE TABLE xMM(d DateTime('Europe/Moscow'),a Int64, f Int64) ENGINE = MergeTree PARTITION BY (toYYYYMM(d), a) ORDER BY tuple() SETTINGS index_granularity = 8192; +CREATE TABLE xMM(d DateTime('Asia/Istanbul'),a Int64, f Int64) ENGINE = MergeTree PARTITION BY (toYYYYMM(d), a) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES xMM; -INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00', 'Europe/Moscow') + number*60, 1, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00', 'Europe/Moscow') + number*60, 2, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00', 'Europe/Moscow') + number*60, 3, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00', 'Europe/Moscow') + number*60, 2, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-10-01 00:00:00', 'Europe/Moscow') + number*60, 1, number FROM numbers(5000); -INSERT INTO xMM SELECT toDateTime('2020-10-15 00:00:00', 'Europe/Moscow') + number*60, 1, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, 1, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, 2, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, 3, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, 2, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + number*60, 1, number FROM numbers(5000); +INSERT INTO xMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, 1, number FROM numbers(5000); SELECT '--------- tMM ----------------------------'; @@ -44,8 +44,8 @@ select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; -select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Europe/Moscow'); -select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Europe/Moscow'); +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Asia/Istanbul'); +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul'); select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.reference b/tests/queries/0_stateless/01508_partition_pruning_long.reference index 9cd208a336f..afdb4257505 100644 --- a/tests/queries/0_stateless/01508_partition_pruning_long.reference +++ b/tests/queries/0_stateless/01508_partition_pruning_long.reference @@ -35,11 +35,11 @@ select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d 3 15000 Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges -select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Europe/Moscow'); +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Asia/Istanbul'); 6 30000 Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges -select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Europe/Moscow'); +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul'); 0 0 Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges diff --git a/tests/queries/0_stateless/01516_date_time_output_format.sql b/tests/queries/0_stateless/01516_date_time_output_format.sql index 224d8ef1035..3c99d1bb81b 100644 --- a/tests/queries/0_stateless/01516_date_time_output_format.sql +++ b/tests/queries/0_stateless/01516_date_time_output_format.sql @@ -1,16 +1,16 @@ DROP TABLE IF EXISTS test_datetime; -CREATE TABLE test_datetime(timestamp DateTime('Europe/Moscow')) ENGINE=Log; +CREATE TABLE test_datetime(timestamp DateTime('Asia/Istanbul')) ENGINE=Log; INSERT INTO test_datetime VALUES ('2020-10-15 00:00:00'); SET date_time_output_format = 'simple'; SELECT timestamp FROM test_datetime; -SELECT formatDateTime(toDateTime('2020-10-15 00:00:00', 'Europe/Moscow'), '%Y-%m-%d %R:%S') as formatted_simple FROM test_datetime; +SELECT formatDateTime(toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul'), '%Y-%m-%d %R:%S') as formatted_simple FROM test_datetime; SET date_time_output_format = 'iso'; SELECT timestamp FROM test_datetime; -SELECT formatDateTime(toDateTime('2020-10-15 00:00:00', 'Europe/Moscow'), '%Y-%m-%dT%R:%SZ', 'UTC') as formatted_iso FROM test_datetime;; +SELECT formatDateTime(toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul'), '%Y-%m-%dT%R:%SZ', 'UTC') as formatted_iso FROM test_datetime;; SET date_time_output_format = 'unix_timestamp'; SELECT timestamp FROM test_datetime; @@ -19,7 +19,7 @@ SELECT toUnixTimestamp(timestamp) FROM test_datetime; SET date_time_output_format = 'simple'; DROP TABLE test_datetime; -CREATE TABLE test_datetime(timestamp DateTime64(3, 'Europe/Moscow')) Engine=Log; +CREATE TABLE test_datetime(timestamp DateTime64(3, 'Asia/Istanbul')) Engine=Log; INSERT INTO test_datetime VALUES ('2020-10-15 00:00:00'), (1602709200123); diff --git a/tests/queries/0_stateless/01582_any_join_supertype.sql b/tests/queries/0_stateless/01582_any_join_supertype.sql index 6b06d78c83c..9cd7b4397ab 100644 --- a/tests/queries/0_stateless/01582_any_join_supertype.sql +++ b/tests/queries/0_stateless/01582_any_join_supertype.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS foo; DROP TABLE IF EXISTS bar; -CREATE TABLE foo (server_date Date, server_time Datetime('Europe/Moscow'), dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date); +CREATE TABLE foo (server_date Date, server_time Datetime('Asia/Istanbul'), dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date); CREATE TABLE bar (server_date Date, dimension_1 String) ENGINE = MergeTree() PARTITION BY toYYYYMM(server_date) ORDER BY (server_date); INSERT INTO foo VALUES ('2020-01-01', '2020-01-01 12:00:00', 'test1'), ('2020-01-01', '2020-01-01 13:00:00', 'test2'); diff --git a/tests/queries/0_stateless/01615_two_args_function_index_fix.sql b/tests/queries/0_stateless/01615_two_args_function_index_fix.sql index dd2bde2eafc..6128bdfcdfb 100644 --- a/tests/queries/0_stateless/01615_two_args_function_index_fix.sql +++ b/tests/queries/0_stateless/01615_two_args_function_index_fix.sql @@ -1,6 +1,6 @@ drop table if exists bad_date_time; -create table bad_date_time (time Datetime('Europe/Moscow'), count UInt16) Engine = MergeTree() ORDER BY (time); +create table bad_date_time (time Datetime('Asia/Istanbul'), count UInt16) Engine = MergeTree() ORDER BY (time); insert into bad_date_time values('2020-12-20 20:59:52', 1), ('2020-12-20 21:59:52', 1), ('2020-12-20 01:59:52', 1); diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql index e8c2a0b1373..cc52859724d 100644 --- a/tests/queries/0_stateless/01676_reinterpret_as.sql +++ b/tests/queries/0_stateless/01676_reinterpret_as.sql @@ -30,8 +30,8 @@ SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11' SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a; SELECT 'Dates'; SELECT reinterpret(0, 'Date'), reinterpret('', 'Date'); -SELECT reinterpret(0, 'DateTime(''Europe/Moscow'')'), reinterpret('', 'DateTime(''Europe/Moscow'')'); -SELECT reinterpret(0, 'DateTime64(3, ''Europe/Moscow'')'), reinterpret('', 'DateTime64(3, ''Europe/Moscow'')'); +SELECT reinterpret(0, 'DateTime(''Asia/Istanbul'')'), reinterpret('', 'DateTime(''Asia/Istanbul'')'); +SELECT reinterpret(0, 'DateTime64(3, ''Asia/Istanbul'')'), reinterpret('', 'DateTime64(3, ''Asia/Istanbul'')'); SELECT 'Decimals'; SELECT reinterpret(toDecimal32(5, 2), 'Decimal32(2)'), reinterpret('1', 'Decimal32(2)'); SELECT reinterpret(toDecimal64(5, 2), 'Decimal64(2)'), reinterpret('1', 'Decimal64(2)');; diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference index 41a8d653a3f..7b3b9ae04d6 100644 --- a/tests/queries/0_stateless/01691_DateTime64_clamp.reference +++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference @@ -1,27 +1,27 @@ -- { echo } -- These values are within the extended range of DateTime64 [1925-01-01, 2284-01-01) -SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow'); +SELECT toTimeZone(toDateTime(-2, 2), 'Asia/Istanbul'); 1970-01-01 02:59:58.00 -SELECT toDateTime64(-2, 2, 'Europe/Moscow'); +SELECT toDateTime64(-2, 2, 'Asia/Istanbul'); 1970-01-01 02:59:58.00 -SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow')); +SELECT CAST(-1 AS DateTime64(0, 'Asia/Istanbul')); 1970-01-01 02:59:59 -SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow')); +SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Asia/Istanbul')); 2020-01-01 00:00:00 -SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null; -SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow'); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Asia/Istanbul') FORMAT Null; +SELECT toTimeZone(toDateTime(-2., 2), 'Asia/Istanbul'); 1970-01-01 03:00:00.00 -SELECT toDateTime64(-2., 2, 'Europe/Moscow'); +SELECT toDateTime64(-2., 2, 'Asia/Istanbul'); 1970-01-01 03:00:00.00 -SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow'); +SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul'); 2106-02-07 09:28:16.00 -SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null; +SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul') FORMAT Null; -- These are outsize of extended range and hence clamped -SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Europe/Moscow'); +SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); 1925-01-01 02:00:00.00 -SELECT CAST(-1 * bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Europe/Moscow')); +SELECT CAST(-1 * bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Asia/Istanbul')); 1925-01-01 02:00:00.000 -SELECT CAST(bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Europe/Moscow')); +SELECT CAST(bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Asia/Istanbul')); 2282-12-31 03:00:00.000 -SELECT toDateTime64(bitShiftLeft(toUInt64(1), 35), 2, 'Europe/Moscow'); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); 2282-12-31 03:00:00.00 diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql index 2786d9c1c09..b7077aff1f7 100644 --- a/tests/queries/0_stateless/01691_DateTime64_clamp.sql +++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql @@ -1,17 +1,17 @@ -- { echo } -- These values are within the extended range of DateTime64 [1925-01-01, 2284-01-01) -SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow'); -SELECT toDateTime64(-2, 2, 'Europe/Moscow'); -SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow')); -SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow')); -SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null; -SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow'); -SELECT toDateTime64(-2., 2, 'Europe/Moscow'); -SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow'); -SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null; +SELECT toTimeZone(toDateTime(-2, 2), 'Asia/Istanbul'); +SELECT toDateTime64(-2, 2, 'Asia/Istanbul'); +SELECT CAST(-1 AS DateTime64(0, 'Asia/Istanbul')); +SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Asia/Istanbul')); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Asia/Istanbul') FORMAT Null; +SELECT toTimeZone(toDateTime(-2., 2), 'Asia/Istanbul'); +SELECT toDateTime64(-2., 2, 'Asia/Istanbul'); +SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul'); +SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul') FORMAT Null; -- These are outsize of extended range and hence clamped -SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Europe/Moscow'); -SELECT CAST(-1 * bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Europe/Moscow')); -SELECT CAST(bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Europe/Moscow')); -SELECT toDateTime64(bitShiftLeft(toUInt64(1), 35), 2, 'Europe/Moscow'); +SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); +SELECT CAST(-1 * bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Asia/Istanbul')); +SELECT CAST(bitShiftLeft(toUInt64(1), 35) AS DateTime64(3, 'Asia/Istanbul')); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql index fac0c341007..c08062a456c 100644 --- a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql +++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql @@ -1,7 +1,7 @@ -select toDateTime64(toDateTime(1, 'Europe/Moscow'), 2); +select toDateTime64(toDateTime(1, 'Asia/Istanbul'), 2); select toDateTime64(toDate(1), 2) FORMAT Null; -- Unknown timezone select toDateTime64(toDateTime(1), 2) FORMAT Null; -- Unknown timezone -select toDateTime64(toDateTime(1), 2, 'Europe/Moscow'); -select toDateTime64(toDate(1), 2, 'Europe/Moscow'); +select toDateTime64(toDateTime(1), 2, 'Asia/Istanbul'); +select toDateTime64(toDate(1), 2, 'Asia/Istanbul'); select toDateTime64(toDateTime(1), 2, 'GMT'); select toDateTime64(toDate(1), 2, 'GMT'); diff --git a/tests/queries/0_stateless/01698_fix_toMinute.reference b/tests/queries/0_stateless/01698_fix_toMinute.reference index 7675aad3a57..eb1f7eb9ca1 100644 --- a/tests/queries/0_stateless/01698_fix_toMinute.reference +++ b/tests/queries/0_stateless/01698_fix_toMinute.reference @@ -19,6 +19,6 @@ Check the bug causing situation: the special Australia/Lord_Howe time zone. toDa 1554569400 2019-04-07 03:20:00 2019-04-07 03:20:00 1554570000 2019-04-07 03:30:00 2019-04-07 03:30:00 1554570600 2019-04-07 03:40:00 2019-04-07 03:40:00 -4 days test in batch comparing with manually computation result for Europe/Moscow whose timezone epoc is of whole hour: +4 days test in batch comparing with manually computation result for Asia/Istanbul whose timezone epoc is of whole hour: 4 days test in batch comparing with manually computation result for Asia/Tehran whose timezone epoc is of half hour: 4 days test in batch comparing with manually computation result for Australia/Lord_Howe whose timezone epoc is of half hour and also its DST offset is half hour: diff --git a/tests/queries/0_stateless/01698_fix_toMinute.sql b/tests/queries/0_stateless/01698_fix_toMinute.sql index f582806719d..4d11efa901d 100644 --- a/tests/queries/0_stateless/01698_fix_toMinute.sql +++ b/tests/queries/0_stateless/01698_fix_toMinute.sql @@ -3,9 +3,9 @@ SELECT 'Check the bug causing situation: the special Australia/Lord_Howe time zo SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, toString(x) as xx FROM numbers(20); /* The Batch Part. Test period is whole 4 days*/ -SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow whose timezone epoc is of whole hour:'; -SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; -SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT '4 days test in batch comparing with manually computation result for Asia/Istanbul whose timezone epoc is of whole hour:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran whose timezone epoc is of half hour:'; SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference index a1cc6391e6f..860829f0ce6 100644 --- a/tests/queries/0_stateless/01699_timezoneOffset.reference +++ b/tests/queries/0_stateless/01699_timezoneOffset.reference @@ -1,4 +1,4 @@ -DST boundary test for Europe/Moscow: +DST boundary test for Asia/Istanbul: 0 1981-04-01 22:40:00 14400 354998400 1 1981-04-01 22:50:00 14400 354999000 2 1981-04-01 23:00:00 14400 354999600 @@ -70,7 +70,7 @@ DST boundary test for Australia/Lord_Howe: 15 2019-04-07 03:00:00 37800 1554568200 16 2019-04-07 03:10:00 37800 1554568800 17 2019-04-07 03:20:00 37800 1554569400 -4 days test in batch comparing with manually computation result for Europe/Moscow: +4 days test in batch comparing with manually computation result for Asia/Istanbul: 4 days test in batch comparing with manually computation result for Asia/Tehran: 4 days test in batch comparing with manually computation result for Australia/Lord_Howe Moscow DST Years: diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql index 8cabb23c4de..f9e6c2db970 100644 --- a/tests/queries/0_stateless/01699_timezoneOffset.sql +++ b/tests/queries/0_stateless/01699_timezoneOffset.sql @@ -1,8 +1,8 @@ /* Test the DST(daylight saving time) offset changing boundary*/ -SELECT 'DST boundary test for Europe/Moscow:'; -SELECT number,(toDateTime('1981-04-01 22:40:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); -SELECT number,(toDateTime('1981-09-30 23:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18); +SELECT 'DST boundary test for Asia/Istanbul:'; +SELECT number,(toDateTime('1981-04-01 22:40:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); +SELECT number,(toDateTime('1981-09-30 23:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18); SELECT 'DST boundary test for Asia/Tehran:'; SELECT number,(toDateTime('2020-03-21 22:40:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); @@ -18,9 +18,9 @@ SELECT number,(toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERV /* The Batch Part. Test period is whole 4 days*/ -SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow:'; -SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; -SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT '4 days test in batch comparing with manually computation result for Asia/Istanbul:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Asia/Istanbul') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran:'; SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; @@ -34,9 +34,9 @@ SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/L /* Find all the years had followed DST during given period*/ SELECT 'Moscow DST Years:'; -SELECT number, (toDateTime('1970-06-01 00:00:00', 'Europe/Moscow') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 10800; +SELECT number, (toDateTime('1970-06-01 00:00:00', 'Asia/Istanbul') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 10800; SELECT 'Moscow DST Years with perment DST from 2011-2014:'; -SELECT min((toDateTime('2011-01-01 00:00:00', 'Europe/Moscow') + INTERVAL number DAY) as day) as start, max(day) as end, count(1), concat(toString(toYear(day)),'_',toString(timezoneOffset(day)))as DST from numbers(365*4+1) group by DST order by start; +SELECT min((toDateTime('2011-01-01 00:00:00', 'Asia/Istanbul') + INTERVAL number DAY) as day) as start, max(day) as end, count(1), concat(toString(toYear(day)),'_',toString(timezoneOffset(day)))as DST from numbers(365*4+1) group by DST order by start; SELECT 'Tehran DST Years:'; SELECT number, (toDateTime('1970-06-01 00:00:00', 'Asia/Tehran') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 12600; diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql index f51a1bb2280..b0dbd1dfc84 100644 --- a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql +++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql @@ -1,4 +1,4 @@ -SELECT toString(toDateTime('-922337203.6854775808', 1, 'Europe/Moscow')); -SELECT toString(toDateTime('9922337203.6854775808', 1, 'Europe/Moscow')); -SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1, 'Europe/Moscow'); -SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1, 'Europe/Moscow'); +SELECT toString(toDateTime('-922337203.6854775808', 1, 'Asia/Istanbul')); +SELECT toString(toDateTime('9922337203.6854775808', 1, 'Asia/Istanbul')); +SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1, 'Asia/Istanbul'); +SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql index 88859177a92..7e7fe3f2e16 100644 --- a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql +++ b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql @@ -5,7 +5,7 @@ INSERT INTO t VALUES (3, '1111111111222'); INSERT INTO t VALUES (4, '1111111111.222'); SELECT * FROM t ORDER BY i; -SELECT toDateTime64(1111111111.222, 3, 'Europe/Moscow'); -SELECT toDateTime64('1111111111.222', 3, 'Europe/Moscow'); -SELECT toDateTime64('1111111111222', 3, 'Europe/Moscow'); -SELECT ignore(toDateTime64(1111111111222, 3, 'Europe/Moscow')); -- This gives somewhat correct but unexpected result +SELECT toDateTime64(1111111111.222, 3, 'Asia/Istanbul'); +SELECT toDateTime64('1111111111.222', 3, 'Asia/Istanbul'); +SELECT toDateTime64('1111111111222', 3, 'Asia/Istanbul'); +SELECT ignore(toDateTime64(1111111111222, 3, 'Asia/Istanbul')); -- This gives somewhat correct but unexpected result diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.sql b/tests/queries/0_stateless/01734_datetime64_from_float.sql index 416638a4a73..bb837c681e3 100644 --- a/tests/queries/0_stateless/01734_datetime64_from_float.sql +++ b/tests/queries/0_stateless/01734_datetime64_from_float.sql @@ -1,3 +1,3 @@ -SELECT CAST(1111111111.222 AS DateTime64(3, 'Europe/Moscow')); -SELECT toDateTime(1111111111.222, 3, 'Europe/Moscow'); -SELECT toDateTime64(1111111111.222, 3, 'Europe/Moscow'); +SELECT CAST(1111111111.222 AS DateTime64(3, 'Asia/Istanbul')); +SELECT toDateTime(1111111111.222, 3, 'Asia/Istanbul'); +SELECT toDateTime64(1111111111.222, 3, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01761_round_year_bounds.sql b/tests/queries/0_stateless/01761_round_year_bounds.sql index fed12c55568..57f421d155e 100644 --- a/tests/queries/0_stateless/01761_round_year_bounds.sql +++ b/tests/queries/0_stateless/01761_round_year_bounds.sql @@ -1 +1 @@ -SELECT toStartOfInterval(toDateTime(-9223372036854775808), toIntervalYear(100), 'Europe/Moscow') FORMAT Null; +SELECT toStartOfInterval(toDateTime(-9223372036854775808), toIntervalYear(100), 'Asia/Istanbul') FORMAT Null; diff --git a/tests/queries/0_stateless/01769_extended_range_2.sql b/tests/queries/0_stateless/01769_extended_range_2.sql index a2570c9397b..0b1319ddaea 100644 --- a/tests/queries/0_stateless/01769_extended_range_2.sql +++ b/tests/queries/0_stateless/01769_extended_range_2.sql @@ -1,3 +1,3 @@ SELECT toDateTime64('1969-12-31 18:00:12', 0, 'America/Phoenix'); SELECT toDateTime64('1969-12-30 18:00:12', 0, 'America/Phoenix'); -SELECT toDateTime64('1969-12-31 18:00:12', 0, 'Europe/Moscow'); +SELECT toDateTime64('1969-12-31 18:00:12', 0, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01772_to_start_of_hour_align.sql b/tests/queries/0_stateless/01772_to_start_of_hour_align.sql index 6d1bb460f90..5dbf6a97e57 100644 --- a/tests/queries/0_stateless/01772_to_start_of_hour_align.sql +++ b/tests/queries/0_stateless/01772_to_start_of_hour_align.sql @@ -13,8 +13,8 @@ SELECT toStartOfInterval(toDateTime('2021-03-23 13:58:00', 'Asia/Kolkata'), INTE -- In case of timezone shifts, rounding is performed to the hour number on "wall clock" time. -- The intervals may become shorter or longer due to time shifts. For example, the three hour interval may actually last two hours. -- If the same hour number on "wall clock" time correspond to multiple time points due to shifting backwards, the unspecified time point is selected among the candidates. -SELECT toDateTime('2010-03-28 00:00:00', 'Europe/Moscow') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); -SELECT toDateTime('2010-10-31 00:00:00', 'Europe/Moscow') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); +SELECT toDateTime('2010-03-28 00:00:00', 'Asia/Istanbul') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); +SELECT toDateTime('2010-10-31 00:00:00', 'Asia/Istanbul') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); -- And this should work even for non whole number of hours shifts. SELECT toDateTime('2020-04-05 00:00:00', 'Australia/Lord_Howe') + INTERVAL 15 * number MINUTE AS src, toStartOfInterval(src, INTERVAL 2 HOUR) AS rounded, toUnixTimestamp(src) AS t FROM numbers(20); diff --git a/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.reference b/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.reference index 75c114cdd74..bf717d7da0b 100644 --- a/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.reference +++ b/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.reference @@ -1,27 +1,27 @@ -- { echo } -SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Asia/Istanbul'), '%C'); 20 -SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 21 -SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); 22 -- non-zero scale -SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 19 -SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Asia/Istanbul'), '%C'); 20 -SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 21 -SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); 22 diff --git a/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.sql b/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.sql index e368f45cbda..712afd28cd6 100644 --- a/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.sql +++ b/tests/queries/0_stateless/01802_formatDateTime_DateTime64_century.sql @@ -1,16 +1,16 @@ -- { echo } -SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'), '%C'); +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Asia/Istanbul'), '%C'); -- non-zero scale -SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Europe/Moscow'), '%C'); -SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Europe/Moscow'), '%C'); \ No newline at end of file +SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); +SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Asia/Istanbul'), '%C'); \ No newline at end of file diff --git a/tests/queries/0_stateless/01802_toDateTime64_large_values.reference b/tests/queries/0_stateless/01802_toDateTime64_large_values.reference index c44c61ab93a..e60b1c30314 100644 --- a/tests/queries/0_stateless/01802_toDateTime64_large_values.reference +++ b/tests/queries/0_stateless/01802_toDateTime64_large_values.reference @@ -2,9 +2,9 @@ SELECT toDateTime64('2205-12-12 12:12:12', 0, 'UTC'); 2205-12-12 12:12:12 -SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Asia/Istanbul'); 2205-12-12 12:12:12 -SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Asia/Istanbul'); 2205-12-12 12:12:12.000000 -SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Asia/Istanbul'); 2205-12-12 12:12:12.000000 diff --git a/tests/queries/0_stateless/01802_toDateTime64_large_values.sql b/tests/queries/0_stateless/01802_toDateTime64_large_values.sql index 299111f43bc..d82d4433b2d 100644 --- a/tests/queries/0_stateless/01802_toDateTime64_large_values.sql +++ b/tests/queries/0_stateless/01802_toDateTime64_large_values.sql @@ -1,7 +1,7 @@ -- { echo } SELECT toDateTime64('2205-12-12 12:12:12', 0, 'UTC'); -SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'); +SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Asia/Istanbul'); -SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); -SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow'); \ No newline at end of file +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Asia/Istanbul'); +SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Asia/Istanbul'); \ No newline at end of file diff --git a/tests/queries/0_stateless/01811_datename.sql b/tests/queries/0_stateless/01811_datename.sql index 0cd538b52c7..b757d9ae018 100644 --- a/tests/queries/0_stateless/01811_datename.sql +++ b/tests/queries/0_stateless/01811_datename.sql @@ -66,7 +66,7 @@ SELECT WITH toDateTime('2021-04-14 23:22:33', 'UTC') as date SELECT - dateName('weekday', date, 'Europe/Moscow'), - dateName('hour', date, 'Europe/Moscow'), - dateName('minute', date, 'Europe/Moscow'), - dateName('second', date, 'Europe/Moscow'); + dateName('weekday', date, 'Asia/Istanbul'), + dateName('hour', date, 'Asia/Istanbul'), + dateName('minute', date, 'Asia/Istanbul'), + dateName('second', date, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01821_to_date_time_ubsan.sql b/tests/queries/0_stateless/01821_to_date_time_ubsan.sql index 377291e015f..5ec767fe413 100644 --- a/tests/queries/0_stateless/01821_to_date_time_ubsan.sql +++ b/tests/queries/0_stateless/01821_to_date_time_ubsan.sql @@ -1,2 +1,2 @@ -SELECT toDateTime('9223372036854775806', 7, 'Europe/Moscow'); -SELECT toDateTime('9223372036854775806', 8, 'Europe/Moscow'); +SELECT toDateTime('9223372036854775806', 7, 'Asia/Istanbul'); +SELECT toDateTime('9223372036854775806', 8, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01852_map_combinator.sql b/tests/queries/0_stateless/01852_map_combinator.sql index 3036e2e0ea4..a23a507bc27 100644 --- a/tests/queries/0_stateless/01852_map_combinator.sql +++ b/tests/queries/0_stateless/01852_map_combinator.sql @@ -26,7 +26,7 @@ select minMap(val) from values ('val Map(String, String)', (map('1', '1')), (ma select minMap(val) from values ('val Map(FixedString(1), FixedString(1))', (map('1', '1')), (map('1', '2'))); select minMap(val) from values ('val Map(UInt64, UInt64)', (map(1, 1)), (map(1, 2))); select minMap(val) from values ('val Map(Date, Int16)', (map(1, 1)), (map(1, 2))); -select minMap(val) from values ('val Map(DateTime(\'Europe/Moscow\'), Int32)', (map(1, 1)), (map(1, 2))); +select minMap(val) from values ('val Map(DateTime(\'Asia/Istanbul\'), Int32)', (map(1, 1)), (map(1, 2))); select minMap(val) from values ('val Map(Enum16(\'a\'=1), Int16)', (map('a', 1)), (map('a', 2))); select maxMap(val) from values ('val Map(String, String)', (map('1', '1')), (map('1', '2'))); select minMap(val) from values ('val Map(Int128, Int128)', (map(1, 1)), (map(1, 2))); diff --git a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql index 1aea0fb91f2..2f0ed1fdc7f 100644 --- a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql +++ b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql @@ -1,5 +1,5 @@ drop table if exists replacing; -create table replacing( `A` Int64, `D` DateTime64(9, 'Europe/Moscow'), `S` String) ENGINE = ReplacingMergeTree(D) ORDER BY A; +create table replacing( `A` Int64, `D` DateTime64(9, 'Asia/Istanbul'), `S` String) ENGINE = ReplacingMergeTree(D) ORDER BY A; insert into replacing values (1,'1970-01-01 08:25:46.300800000','a'); insert into replacing values (2,'1970-01-01 08:25:46.300800002','b'); diff --git a/tests/queries/0_stateless/01868_order_by_fill_with_datetime64.sql b/tests/queries/0_stateless/01868_order_by_fill_with_datetime64.sql index ff3134d37ed..3a49ef73d1a 100644 --- a/tests/queries/0_stateless/01868_order_by_fill_with_datetime64.sql +++ b/tests/queries/0_stateless/01868_order_by_fill_with_datetime64.sql @@ -1,2 +1,2 @@ -SELECT n, source FROM (SELECT toDateTime64(number * 1000, 3,'Europe/Moscow') AS n, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ) ORDER BY n ASC WITH FILL STEP toDateTime64(1000, 3); -SELECT n, source FROM (SELECT toDateTime64(number * 1000, 9,'Europe/Moscow') AS n, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ) ORDER BY n ASC WITH FILL STEP toDateTime64(1000, 9); +SELECT n, source FROM (SELECT toDateTime64(number * 1000, 3,'Asia/Istanbul') AS n, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ) ORDER BY n ASC WITH FILL STEP toDateTime64(1000, 3); +SELECT n, source FROM (SELECT toDateTime64(number * 1000, 9,'Asia/Istanbul') AS n, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ) ORDER BY n ASC WITH FILL STEP toDateTime64(1000, 9); diff --git a/tests/queries/0_stateless/01891_partition_hash.sql b/tests/queries/0_stateless/01891_partition_hash.sql index f401c7c2d07..f56ed6a4ff4 100644 --- a/tests/queries/0_stateless/01891_partition_hash.sql +++ b/tests/queries/0_stateless/01891_partition_hash.sql @@ -1,5 +1,5 @@ drop table if exists tab; -create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, i128 Int128, i256 Int256, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, u128 UInt128, u256 UInt256, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Europe/Moscow'), dt64 DateTime64(3, 'Europe/Moscow'), dec128 Decimal128(3), dec256 Decimal256(4), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) order by tuple(); +create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, i128 Int128, i256 Int256, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, u128 UInt128, u256 UInt256, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Asia/Istanbul'), dt64 DateTime64(3, 'Asia/Istanbul'), dec128 Decimal128(3), dec256 Decimal256(4), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) order by tuple(); insert into tab values (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a'); -- Here we check that partition id did not change. -- Different result means Backward Incompatible Change. Old partitions will not be accepted by new server. diff --git a/tests/queries/0_stateless/01891_partition_hash_no_long_int.sql b/tests/queries/0_stateless/01891_partition_hash_no_long_int.sql index 643266f1ea3..431f566b806 100644 --- a/tests/queries/0_stateless/01891_partition_hash_no_long_int.sql +++ b/tests/queries/0_stateless/01891_partition_hash_no_long_int.sql @@ -1,7 +1,7 @@ -- Tags: long drop table if exists tab; -create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Europe/Moscow'), dt64 DateTime64(3, 'Europe/Moscow'), dec128 Decimal128(3), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, u8, u16, u32, u64, id, s, fs, a, t, d, dt, dt64, dec128, lc) order by tuple(); +create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Asia/Istanbul'), dt64 DateTime64(3, 'Asia/Istanbul'), dec128 Decimal128(3), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, u8, u16, u32, u64, id, s, fs, a, t, d, dt, dt64, dec128, lc) order by tuple(); insert into tab values (-1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', 'a'); -- Here we check that partition id did not change. -- Different result means Backward Incompatible Change. Old partitions will not be accepted by new server. diff --git a/tests/queries/0_stateless/01905_to_json_string.sql b/tests/queries/0_stateless/01905_to_json_string.sql index e92c32f3422..38c02ef13fb 100644 --- a/tests/queries/0_stateless/01905_to_json_string.sql +++ b/tests/queries/0_stateless/01905_to_json_string.sql @@ -8,7 +8,7 @@ $$ d Decimal32(4), e Nullable(Enum16('h' = 1, 'w' = 5 , 'o' = -200)), f Float64, - g Tuple(Date, DateTime('Europe/Moscow'), DateTime64(3, 'Europe/Moscow'), UUID), + g Tuple(Date, DateTime('Asia/Istanbul'), DateTime64(3, 'Asia/Istanbul'), UUID), h FixedString(2), i Array(Nullable(UUID)) $$, 10, 5, 3) limit 2; diff --git a/tests/queries/0_stateless/01921_datatype_date32.sql b/tests/queries/0_stateless/01921_datatype_date32.sql index e01bdfeee8d..49e5366b455 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.sql +++ b/tests/queries/0_stateless/01921_datatype_date32.sql @@ -23,7 +23,7 @@ select toMinute(x1) from t1; -- { serverError 43 } select '-------toSecond---------'; select toSecond(x1) from t1; -- { serverError 43 } select '-------toStartOfDay---------'; -select toStartOfDay(x1, 'Europe/Moscow') from t1; +select toStartOfDay(x1, 'Asia/Istanbul') from t1; select '-------toMonday---------'; select toMonday(x1) from t1; select '-------toISOWeek---------'; @@ -57,21 +57,21 @@ select toStartOfHour(x1) from t1; -- { serverError 43 } select '-------toStartOfISOYear---------'; select toStartOfISOYear(x1) from t1; select '-------toRelativeYearNum---------'; -select toRelativeYearNum(x1, 'Europe/Moscow') from t1; +select toRelativeYearNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeQuarterNum---------'; -select toRelativeQuarterNum(x1, 'Europe/Moscow') from t1; +select toRelativeQuarterNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeMonthNum---------'; -select toRelativeMonthNum(x1, 'Europe/Moscow') from t1; +select toRelativeMonthNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeWeekNum---------'; -select toRelativeWeekNum(x1, 'Europe/Moscow') from t1; +select toRelativeWeekNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeDayNum---------'; -select toRelativeDayNum(x1, 'Europe/Moscow') from t1; +select toRelativeDayNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeHourNum---------'; -select toRelativeHourNum(x1, 'Europe/Moscow') from t1; +select toRelativeHourNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeMinuteNum---------'; -select toRelativeMinuteNum(x1, 'Europe/Moscow') from t1; +select toRelativeMinuteNum(x1, 'Asia/Istanbul') from t1; select '-------toRelativeSecondNum---------'; -select toRelativeSecondNum(x1, 'Europe/Moscow') from t1; +select toRelativeSecondNum(x1, 'Asia/Istanbul') from t1; select '-------toTime---------'; select toTime(x1) from t1; -- { serverError 43 } select '-------toYYYYMM---------'; diff --git a/tests/queries/0_stateless/01925_date_date_time_comparison.sql b/tests/queries/0_stateless/01925_date_date_time_comparison.sql index 13e856384d2..0659d85b028 100644 --- a/tests/queries/0_stateless/01925_date_date_time_comparison.sql +++ b/tests/queries/0_stateless/01925_date_date_time_comparison.sql @@ -1,2 +1,2 @@ -SELECT toDate('2000-01-01') < toDateTime('2000-01-01 00:00:01', 'Europe/Moscow'); -SELECT toDate('2000-01-01') < toDateTime64('2000-01-01 00:00:01', 0, 'Europe/Moscow'); +SELECT toDate('2000-01-01') < toDateTime('2000-01-01 00:00:01', 'Asia/Istanbul'); +SELECT toDate('2000-01-01') < toDateTime64('2000-01-01 00:00:01', 0, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01926_date_date_time_supertype.reference b/tests/queries/0_stateless/01926_date_date_time_supertype.reference index ec9933dfbd2..e4e8ddfceab 100644 --- a/tests/queries/0_stateless/01926_date_date_time_supertype.reference +++ b/tests/queries/0_stateless/01926_date_date_time_supertype.reference @@ -1,12 +1,12 @@ Array -Array(DateTime(\'Europe/Moscow\')) -Array(DateTime64(5, \'Europe/Moscow\')) -Array(DateTime64(6, \'Europe/Moscow\')) +Array(DateTime(\'Asia/Istanbul\')) +Array(DateTime64(5, \'Asia/Istanbul\')) +Array(DateTime64(6, \'Asia/Istanbul\')) If -2000-01-01 00:00:00 DateTime(\'Europe/Moscow\') -2000-01-01 00:00:00 DateTime(\'Europe/Moscow\') -2000-01-01 00:00:00.00000 DateTime64(5, \'Europe/Moscow\') -2000-01-01 00:00:00.00000 DateTime64(5, \'Europe/Moscow\') +2000-01-01 00:00:00 DateTime(\'Asia/Istanbul\') +2000-01-01 00:00:00 DateTime(\'Asia/Istanbul\') +2000-01-01 00:00:00.00000 DateTime64(5, \'Asia/Istanbul\') +2000-01-01 00:00:00.00000 DateTime64(5, \'Asia/Istanbul\') Cast 2000-01-01 00:00:00 DateTime(\'UTC\') 2000-01-01 00:00:00.00000 DateTime64(5, \'UTC\') diff --git a/tests/queries/0_stateless/01926_date_date_time_supertype.sql b/tests/queries/0_stateless/01926_date_date_time_supertype.sql index cce488a5cff..756fd04a01f 100644 --- a/tests/queries/0_stateless/01926_date_date_time_supertype.sql +++ b/tests/queries/0_stateless/01926_date_date_time_supertype.sql @@ -1,8 +1,8 @@ SELECT 'Array'; -SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow')]); -SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow'), toDateTime64('2000-01-01', 5, 'Europe/Moscow')]); -SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow'), toDateTime64('2000-01-01', 5, 'Europe/Moscow'), toDateTime64('2000-01-01', 6, 'Europe/Moscow')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Asia/Istanbul')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Asia/Istanbul'), toDateTime64('2000-01-01', 5, 'Asia/Istanbul')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Asia/Istanbul'), toDateTime64('2000-01-01', 5, 'Asia/Istanbul'), toDateTime64('2000-01-01', 6, 'Asia/Istanbul')]); DROP TABLE IF EXISTS predicate_table; CREATE TABLE predicate_table (value UInt8) ENGINE=TinyLog; @@ -11,11 +11,11 @@ INSERT INTO predicate_table VALUES (0), (1); SELECT 'If'; -WITH toDate('2000-01-01') as a, toDateTime('2000-01-01', 'Europe/Moscow') as b +WITH toDate('2000-01-01') as a, toDateTime('2000-01-01', 'Asia/Istanbul') as b SELECT if(value, b, a) as result, toTypeName(result) FROM predicate_table; -WITH toDateTime('2000-01-01', 'Europe/Moscow') as a, toDateTime64('2000-01-01', 5, 'Europe/Moscow') as b +WITH toDateTime('2000-01-01', 'Asia/Istanbul') as a, toDateTime64('2000-01-01', 5, 'Asia/Istanbul') as b SELECT if(value, b, a) as result, toTypeName(result) FROM predicate_table; diff --git a/tests/queries/0_stateless/02041_conversion_between_date32_and_datetime64.sql b/tests/queries/0_stateless/02041_conversion_between_date32_and_datetime64.sql index 9a25f2b007b..05e5a090d86 100644 --- a/tests/queries/0_stateless/02041_conversion_between_date32_and_datetime64.sql +++ b/tests/queries/0_stateless/02041_conversion_between_date32_and_datetime64.sql @@ -1 +1 @@ -select toDate32(toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow')), toDateTime64(toDate32('2019-01-01'), 3, 'Europe/Moscow') \ No newline at end of file +select toDate32(toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul')), toDateTime64(toDate32('2019-01-01'), 3, 'Asia/Istanbul') \ No newline at end of file diff --git a/tests/queries/0_stateless/02096_date_time_1970_saturation.sql b/tests/queries/0_stateless/02096_date_time_1970_saturation.sql index e0c401443a7..8cd472c4e6c 100644 --- a/tests/queries/0_stateless/02096_date_time_1970_saturation.sql +++ b/tests/queries/0_stateless/02096_date_time_1970_saturation.sql @@ -1,21 +1,21 @@ select toDate(0); -select toDateTime(0, 'Europe/Moscow'); +select toDateTime(0, 'Asia/Istanbul'); select toMonday(toDate(0)); -select toMonday(toDateTime(0, 'Europe/Moscow')); +select toMonday(toDateTime(0, 'Asia/Istanbul')); select toStartOfWeek(toDate(0)); -select toStartOfWeek(toDateTime(0, 'Europe/Moscow')); +select toStartOfWeek(toDateTime(0, 'Asia/Istanbul')); select toStartOfMonth(toDate(0)); -select toStartOfMonth(toDateTime(0, 'Europe/Moscow')); +select toStartOfMonth(toDateTime(0, 'Asia/Istanbul')); select toStartOfQuarter(toDate(0)); -select toStartOfQuarter(toDateTime(0, 'Europe/Moscow')); +select toStartOfQuarter(toDateTime(0, 'Asia/Istanbul')); select toStartOfYear(toDate(0)); -select toStartOfYear(toDateTime(0, 'Europe/Moscow')); -select toTime(toDateTime(0, 'Europe/Moscow')); -select toStartOfMinute(toDateTime(0, 'Europe/Moscow')); -select toStartOfFiveMinute(toDateTime(0, 'Europe/Moscow')); -select toStartOfTenMinutes(toDateTime(0, 'Europe/Moscow')); -select toStartOfFifteenMinutes(toDateTime(0, 'Europe/Moscow')); -select toStartOfHour(toDateTime(0, 'Europe/Moscow')); +select toStartOfYear(toDateTime(0, 'Asia/Istanbul')); +select toTime(toDateTime(0, 'Asia/Istanbul')); +select toStartOfMinute(toDateTime(0, 'Asia/Istanbul')); +select toStartOfFiveMinute(toDateTime(0, 'Asia/Istanbul')); +select toStartOfTenMinutes(toDateTime(0, 'Asia/Istanbul')); +select toStartOfFifteenMinutes(toDateTime(0, 'Asia/Istanbul')); +select toStartOfHour(toDateTime(0, 'Asia/Istanbul')); select toDateTime(0, 'America/Los_Angeles'); select toMonday(toDateTime(0, 'America/Los_Angeles')); diff --git a/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql b/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql index 7f62e187241..df5499df32a 100644 --- a/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql +++ b/tests/queries/0_stateless/02176_toStartOfWeek_overflow_pruning.sql @@ -1,5 +1,5 @@ SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'UTC')); -SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'Europe/Moscow')); +SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'Asia/Istanbul')); SELECT toStartOfWeek(toDateTime('1970-01-01 00:00:00', 'Canada/Atlantic')); SELECT toStartOfWeek(toDateTime('1970-01-04 00:00:00')); diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index d129ccc801e..c0463343956 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -38,9 +38,9 @@ SELECT sum(number) FROM numbers3; SHOW CREATE TABLE numbers3; DROP TABLE numbers3; -CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Europe/Moscow'), UTCEventTime DateTime('UTC')) PARTITION BY EventDate PRIMARY KEY CounterID; +CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Asia/Istanbul'), UTCEventTime DateTime('UTC')) PARTITION BY EventDate PRIMARY KEY CounterID; SET default_table_engine = 'Memory'; -CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Europe/Moscow')) AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; +CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Asia/Istanbul')) AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; CREATE MATERIALIZED VIEW test_view_filtered (EventDate Date, CounterID UInt32) POPULATE AS SELECT CounterID, EventDate FROM test_table WHERE EventDate < '2013-01-01'; SHOW CREATE TABLE test_view_filtered; INSERT INTO test_table (EventDate, UTCEventTime) VALUES ('2014-01-02', '2014-01-02 03:04:06'); diff --git a/tests/queries/0_stateless/data_parquet/alltypes_list.parquet.columns b/tests/queries/0_stateless/data_parquet/alltypes_list.parquet.columns index 794ee47d757..3bf762ed7d5 100644 --- a/tests/queries/0_stateless/data_parquet/alltypes_list.parquet.columns +++ b/tests/queries/0_stateless/data_parquet/alltypes_list.parquet.columns @@ -1 +1 @@ -`a1` Array(Int8), `a2` Array(UInt8), `a3` Array(Int16), `a4` Array(UInt16), `a5` Array(Int32), `a6` Array(UInt32), `a7` Array(Int64), `a8` Array(UInt64), `a9` Array(String), `a10` Array(FixedString(4)), `a11` Array(Float32), `a12` Array(Float64), `a13` Array(Date), `a14` Array(Datetime('Europe/Moscow')), `a15` Array(Decimal(4, 2)), `a16` Array(Decimal(10, 2)), `a17` Array(Decimal(25, 2)) +`a1` Array(Int8), `a2` Array(UInt8), `a3` Array(Int16), `a4` Array(UInt16), `a5` Array(Int32), `a6` Array(UInt32), `a7` Array(Int64), `a8` Array(UInt64), `a9` Array(String), `a10` Array(FixedString(4)), `a11` Array(Float32), `a12` Array(Float64), `a13` Array(Date), `a14` Array(Datetime('Asia/Istanbul')), `a15` Array(Decimal(4, 2)), `a16` Array(Decimal(10, 2)), `a17` Array(Decimal(25, 2)) diff --git a/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns b/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns index df35127ede8..c6d754f04c7 100644 --- a/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns +++ b/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns @@ -1 +1 @@ -`a` Nullable(Int64), `b` Nullable(Float64), `c` Nullable(DateTime('Europe/Moscow')), `index` Nullable(String), `__index_level_1__` Nullable(DateTime('Europe/Moscow')) +`a` Nullable(Int64), `b` Nullable(Float64), `c` Nullable(DateTime('Asia/Istanbul')), `index` Nullable(String), `__index_level_1__` Nullable(DateTime('Asia/Istanbul')) diff --git a/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py b/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py index 92606c9cb26..a1ce8ed7e65 100755 --- a/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py +++ b/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py @@ -4,8 +4,8 @@ import json import sys TYPE_PARQUET_CONVERTED_TO_CLICKHOUSE = { - "TIMESTAMP_MICROS": "DateTime('Europe/Moscow')", - "TIMESTAMP_MILLIS": "DateTime('Europe/Moscow')", + "TIMESTAMP_MICROS": "DateTime('Asia/Istanbul')", + "TIMESTAMP_MILLIS": "DateTime('Asia/Istanbul')", "UTF8": "String", } From 66c026d0ac4fffa3171de008b411a83639140f25 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Fri, 25 Feb 2022 20:44:13 +0300 Subject: [PATCH 002/615] Fix collision of S3 operation log revision --- src/Disks/DiskDecorator.h | 3 + src/Disks/IDisk.h | 8 + src/Disks/S3/DiskS3.cpp | 52 ++++- src/Disks/S3/DiskS3.h | 16 +- src/Disks/S3/registerDiskS3.cpp | 4 +- src/Storages/MergeTree/DataPartsExchange.cpp | 21 ++ .../__init__.py | 0 .../configs/config.d/clusters.xml | 40 ++++ .../configs/config.d/storage_conf.xml | 35 ++++ .../config.d/storage_conf_another_bucket.xml | 34 +++ .../storage_conf_without_zero_copy.xml | 35 ++++ .../test.py | 195 ++++++++++++++++++ 12 files changed, 435 insertions(+), 8 deletions(-) create mode 100644 tests/integration/test_replicated_merge_tree_s3_restore/__init__.py create mode 100644 tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/clusters.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf_without_zero_copy.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3_restore/test.py diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index bace54ff22a..32ebde3ac43 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -78,6 +78,9 @@ public: UInt32 getRefCount(const String & path) const override { return delegate->getRefCount(path); } + void syncRevision(UInt64 revision) override { delegate->syncRevision(revision); } + UInt64 getRevision() const override { return delegate->getRevision(); } + protected: Executor & getExecutor() override; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 5068ac5dde9..16bb111b1eb 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -304,6 +304,14 @@ public: /// other alive harlinks will not be removed. virtual UInt32 getRefCount(const String &) const { return 0; } + /// Revision is an incremental counter of disk operaion. + /// Revision currently exisis only in DiskS3. + /// It is used to save current state during backup and restore that state from backup. + /// This methos sets current disk revision if it lower than required. + virtual void syncRevision(UInt64) {} + /// Return current disk revision. + virtual UInt64 getRevision() const { return 0; } + protected: friend class DiskDecorator; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index b7e752edafd..87c95a91deb 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -155,12 +155,14 @@ DiskS3::DiskS3( DiskPtr metadata_disk_, ContextPtr context_, SettingsPtr settings_, - GetDiskSettings settings_getter_) + GetDiskSettings settings_getter_, + String operation_log_suffix_) : IDiskRemote(name_, s3_root_path_, metadata_disk_, "DiskS3", settings_->thread_pool_size) , bucket(std::move(bucket_)) , current_settings(std::move(settings_)) , settings_getter(settings_getter_) , context(context_) + , operation_log_suffix(operation_log_suffix_) { } @@ -331,7 +333,7 @@ void DiskS3::shutdown() void DiskS3::createFileOperationObject(const String & operation_name, UInt64 revision, const DiskS3::ObjectMetadata & metadata) { auto settings = current_settings.get(); - const String key = "operations/r" + revisionToString(revision) + "-" + operation_name; + const String key = "operations/r" + revisionToString(revision) + operation_log_suffix + "-" + operation_name; WriteBufferFromS3 buffer( settings->client, bucket, @@ -909,6 +911,36 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so } } +void DiskS3::moveRecursiveOrRemove(const String & from_path, const String & to_path, bool send_metadata) +{ + if (exists(to_path)) + { + if (send_metadata) + { + auto revision = ++revision_counter; + const ObjectMetadata object_metadata { + {"from_path", from_path}, + {"to_path", to_path} + }; + createFileOperationObject("rename", revision, object_metadata); + } + if (isDirectory(from_path)) + { + for (auto it = iterateDirectory(from_path); it->isValid(); it->next()) + moveRecursiveOrRemove(it->path(), fs::path(to_path) / it->name(), false); + } + else + { + removeFile(from_path); + LOG_WARNING(log, "Collision in S3 operation log: rename from '{}' to '{}', file removed", from_path, to_path); + } + } + else + { + moveFile(from_path, to_path, send_metadata); + } +} + void DiskS3::restoreFileOperations(const RestoreInformation & restore_information) { auto settings = current_settings.get(); @@ -951,7 +983,7 @@ void DiskS3::restoreFileOperations(const RestoreInformation & restore_informatio auto to_path = object_metadata["to_path"]; if (exists(from_path)) { - moveFile(from_path, to_path, send_metadata); + moveRecursiveOrRemove(from_path, to_path, send_metadata); LOG_TRACE(log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); if (restore_information.detached && isDirectory(to_path)) @@ -1034,9 +1066,10 @@ void DiskS3::restoreFileOperations(const RestoreInformation & restore_informatio std::tuple DiskS3::extractRevisionAndOperationFromKey(const String & key) { String revision_str; + String suffix; String operation; - re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); + re2::RE2::FullMatch(key, key_regexp, &revision_str, &suffix, &operation); return {(revision_str.empty() ? UNKNOWN_REVISION : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; } @@ -1079,6 +1112,17 @@ void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, exec->setMaxThreads(current_settings.get()->thread_pool_size); } +void DiskS3::syncRevision(UInt64 revision) +{ + UInt64 local_revision = revision_counter.load(); + while ((revision > local_revision) && revision_counter.compare_exchange_weak(local_revision, revision)); +} + +UInt64 DiskS3::getRevision() const +{ + return revision_counter.load(); +} + DiskS3Settings::DiskS3Settings( const std::shared_ptr & client_, size_t s3_max_single_read_retries_, diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 698fa6173c2..8095b281641 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -75,7 +75,8 @@ public: DiskPtr metadata_disk_, ContextPtr context_, SettingsPtr settings_, - GetDiskSettings settings_getter_); + GetDiskSettings settings_getter_, + String operation_log_suffix_); std::unique_ptr readFile( const String & path, @@ -117,6 +118,9 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) override; + void syncRevision(UInt64 revision) override; + UInt64 getRevision() const override; + private: void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectMetadata & metadata); /// Converts revision to binary string with leading zeroes (64 bit). @@ -159,6 +163,10 @@ private: /// Forms detached path '../../detached/part_name/' from '../../part_name/' static String pathToDetached(const String & source_path); + /// Move file or files in directory when possible and remove files in other case + /// to restore by S3 operation log with same operations from different replicas + void moveRecursiveOrRemove(const String & from_path, const String & to_path, bool send_metadata); + const String bucket; MultiVersion current_settings; @@ -172,8 +180,8 @@ private: /// File at path {metadata_path}/restore contains metadata restore information inline static const String RESTORE_FILE_NAME = "restore"; - /// Key has format: ../../r{revision}-{operation} - const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; + /// Key has format: ../../r{revision}(-{hostname})-{operation} + const re2::RE2 key_regexp {".*/r(\\d+)(-[\\w\\d\\-\\.]+)?-(\\w+)$"}; /// Object contains information about schema version. inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; @@ -183,6 +191,8 @@ private: const std::vector data_roots {"data", "store"}; ContextPtr context; + + String operation_log_suffix; }; } diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 9b2e7137d53..9f2fc5dd5d1 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -9,6 +9,7 @@ #if USE_AWS_S3 #include +#include #include #include "DiskS3.h" #include "Disks/DiskCacheWrapper.h" @@ -192,7 +193,8 @@ void registerDiskS3(DiskFactory & factory) metadata_disk, context, getSettings(config, config_prefix, context), - getSettings); + getSettings, + "-" + getFQDNOrHostName()); /// This code is used only to check access to the corresponding disk. if (!config.getBool(config_prefix + ".skip_access_check", false)) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 19d990d7c2d..b22aa91af30 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -141,6 +141,16 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedSend}; + { + auto disk = part->volume->getDisk(); + UInt64 revision = parse(params.get("disk_revision", "0")); + if (revision) + disk->syncRevision(revision); + revision = disk->getRevision(); + if (revision) + response.addCookie({"disk_revision", toString(revision)}); + } + if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) writeBinary(part->checksums.getTotalSizeOnDisk(), out); @@ -419,6 +429,13 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( {"compress", "false"} }); + if (disk) + { + UInt64 revision = disk->getRevision(); + if (revision) + uri.addQueryParameter("disk_revision", toString(revision)); + } + Strings capability; if (try_zero_copy && data_settings->allow_remote_fs_zero_copy_replication) { @@ -503,6 +520,10 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (!disk) disk = reservation->getDisk(); + UInt64 revision = parse(in.getResponseCookie("disk_revision", "0")); + if (revision) + disk->syncRevision(revision); + bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch && sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch); diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/__init__.py b/tests/integration/test_replicated_merge_tree_s3_restore/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/clusters.xml b/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/clusters.xml new file mode 100644 index 00000000000..31f7e06db3c --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/clusters.xml @@ -0,0 +1,40 @@ + + + + + + true + + node1z + 9000 + + + node2z + 9000 + + + + + + true + + node1n + 9000 + + + node2n + 9000 + + + + + + true + + node_another_bucket + 9000 + + + + + diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf.xml new file mode 100644 index 00000000000..63fe7ac1769 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf.xml @@ -0,0 +1,35 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + true + 1 + + + local + / + + + + + +
+ s3 +
+ + hdd + +
+
+
+
+ + + 0 + 1 + +
diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml b/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml new file mode 100644 index 00000000000..920db2c9edd --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml @@ -0,0 +1,34 @@ + + + + + s3 + http://minio1:9001/root2/data/ + minio + minio123 + true + 1 + + + local + / + + + + + +
+ s3 +
+ + hdd + +
+
+
+
+ + + 0 + +
diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf_without_zero_copy.xml b/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf_without_zero_copy.xml new file mode 100644 index 00000000000..9194d779d16 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3_restore/configs/config.d/storage_conf_without_zero_copy.xml @@ -0,0 +1,35 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + true + 1 + + + local + / + + + + + +
+ s3 +
+ + hdd + +
+
+
+
+ + + 0 + 0 + +
diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/test.py b/tests/integration/test_replicated_merge_tree_s3_restore/test.py new file mode 100644 index 00000000000..fbf595644f7 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3_restore/test.py @@ -0,0 +1,195 @@ +import os +import logging +import random +import string +import time + +import pytest +from helpers.cluster import ClickHouseCluster, get_instances_dir + + +COMMON_CONFIGS = ["configs/config.d/clusters.xml"] + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + + cluster.add_instance("node1z", + main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf.xml"], + macros={"cluster": "node_zero_copy", "replica": "0"}, + with_minio=True, with_zookeeper=True, stay_alive=True) + cluster.add_instance("node2z", + main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf.xml"], + macros={"cluster": "node_zero_copy", "replica": "1"}, + with_zookeeper=True, stay_alive=True) + cluster.add_instance("node1n", + main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf_without_zero_copy.xml"], + macros={"cluster": "node_no_zero_copy", "replica": "2"}, + with_minio=True, with_zookeeper=True, stay_alive=True) + cluster.add_instance("node2n", + main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf_without_zero_copy.xml"], + macros={"cluster": "node_no_zero_copy", "replica": "3"}, + with_zookeeper=True, stay_alive=True) + cluster.add_instance("node_another_bucket", + main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf_another_bucket.xml"], + macros={"cluster": "node_another_bucket", "replica": "0"}, + with_zookeeper=True, stay_alive=True) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def random_string(length): + letters = string.ascii_letters + return ''.join(random.choice(letters) for i in range(length)) + + +def create_table(node, table_name, schema, attach=False, db_atomic=False, uuid=""): + node.query("CREATE DATABASE IF NOT EXISTS s3 {on_cluster} ENGINE = {engine}".format(engine="Atomic" if db_atomic else "Ordinary", + on_cluster="ON CLUSTER '{cluster}'")) + + create_table_statement = """ + {create} TABLE s3.{table_name} {uuid} {on_cluster} ( + key UInt32, + {schema} + ) ENGINE={engine} + PARTITION BY key + ORDER BY key + SETTINGS + storage_policy='s3', + old_parts_lifetime=600, + index_granularity=512 + """.format(create="ATTACH" if attach else "CREATE", + table_name=table_name, + uuid="UUID '{uuid}'".format(uuid=uuid) if db_atomic and uuid else "", + on_cluster="ON CLUSTER '{cluster}'", + schema=schema, + engine="ReplicatedMergeTree('/clickhouse/tables/{cluster}/test', '{replica}')") + + node.query(create_table_statement) + + +def purge_s3(cluster, bucket): + minio = cluster.minio_client + for obj in list(minio.list_objects(bucket, recursive=True)): + if str(obj.object_name).find(".SCHEMA_VERSION") != -1: + continue + minio.remove_object(bucket, obj.object_name) + + +def drop_s3_metadata(node): + node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/disks/s3/*'], user='root') + + +def drop_shadow_information(node): + node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/shadow/*'], user='root') + + +def create_restore_file(node, revision=None, bucket=None, path=None, detached=None): + node.exec_in_container(['bash', '-c', 'mkdir -p /var/lib/clickhouse/disks/s3/'], user='root') + node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/disks/s3/restore'], user='root') + + add_restore_option = 'echo -en "{}={}\n" >> /var/lib/clickhouse/disks/s3/restore' + if revision: + node.exec_in_container(['bash', '-c', add_restore_option.format('revision', revision)], user='root') + if bucket: + node.exec_in_container(['bash', '-c', add_restore_option.format('source_bucket', bucket)], user='root') + if path: + node.exec_in_container(['bash', '-c', add_restore_option.format('source_path', path)], user='root') + if detached: + node.exec_in_container(['bash', '-c', add_restore_option.format('detached', 'true')], user='root') + + +def get_revision_counter(node, backup_number): + return int(node.exec_in_container( + ['bash', '-c', 'cat /var/lib/clickhouse/disks/s3/shadow/{}/revision.txt'.format(backup_number)], user='root')) + + +def get_table_uuid(node, db_atomic, table): + uuid = "" + if db_atomic: + uuid = node.query("SELECT uuid FROM system.tables WHERE database='s3' AND table='{}' FORMAT TabSeparated".format(table)).strip() + return uuid + + +@pytest.fixture(autouse=True) +def drop_table(cluster): + yield + + node_names = ["node1z", "node2z", "node1n", "node2n", "node_another_bucket"] + + for node_name in node_names: + node = cluster.instances[node_name] + node.query("DROP TABLE IF EXISTS s3.test SYNC") + node.query("DROP DATABASE IF EXISTS s3 SYNC") + + drop_s3_metadata(node) + drop_shadow_information(node) + + buckets = [cluster.minio_bucket, cluster.minio_bucket_2] + for bucket in buckets: + purge_s3(cluster, bucket) + + +@pytest.mark.parametrize( + "db_atomic", [False, True] +) +@pytest.mark.parametrize( + "zero_copy", [False, True] +) +def test_restore_another_bucket_path(cluster, db_atomic, zero_copy): + suffix = "z" if zero_copy else "n" + nodes = [cluster.instances[f"node1{suffix}"], cluster.instances[f"node2{suffix}"]] + + keys = 100 + data_columns = 10 + size = 1 + + columns = [] + for c in range(0, data_columns): + columns.append("data{c} String".format(c=c)) + schema = ", ".join(columns) + + create_table(nodes[0], "test", schema, db_atomic=db_atomic) + uuid = get_table_uuid(nodes[0], db_atomic, "test") + + + dropped_keys = 0 + + for key in range(0, keys): + node = nodes[key % 2] + node.query("INSERT INTO s3.test SELECT {key}, * FROM generateRandom('{schema}') LIMIT {size}".format(key=key, schema=schema, size=size)) + if not (key % 3): + dropped_keys += 1 + node.query("ALTER TABLE s3.test DROP PARTITION '{key}'".format(key=key)) + + for key in range(0, keys): + if not ((key+1) % 3): + dropped_keys += 1 + node.query("ALTER TABLE s3.test DROP PARTITION '{key}'".format(key=key)) + + nodes[0].query("SYSTEM SYNC REPLICA s3.test") + nodes[1].query("SYSTEM SYNC REPLICA s3.test") + + # To ensure parts have merged + nodes[0].query("OPTIMIZE TABLE s3.test") + nodes[1].query("OPTIMIZE TABLE s3.test") + + assert nodes[0].query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(size * (keys - dropped_keys)) + assert nodes[1].query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(size * (keys - dropped_keys)) + + node_another_bucket = cluster.instances["node_another_bucket"] + + create_restore_file(node_another_bucket, bucket="root") + node_another_bucket.query("SYSTEM RESTART DISK s3") + create_table(node_another_bucket, "test", schema, attach=True, db_atomic=db_atomic, uuid=uuid) + + assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(size * (keys - dropped_keys)) + From 8b90bd06a566a7cf846f9e26f593ed87e6811bf7 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Sat, 9 Apr 2022 15:16:58 -0400 Subject: [PATCH 003/615] Improve decompression in readbig for avoiding unnecessary memcpy --- .../CompressedReadBufferFromFile.cpp | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 9efb3c92cde..333734af3d6 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -117,6 +117,25 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) bytes_read += size_decompressed; bytes += size_decompressed; } + else if (nextimpl_working_buffer_offset > 0) + { + //Need to skip some bytes in decompressed data (seek happened before readBig call). + size_compressed = new_size_compressed; + bytes += offset(); + + /// This is for clang static analyzer. + assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); + memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); + working_buffer = Buffer(memory.data(), &memory[size_decompressed]); + decompress(working_buffer, size_decompressed, size_compressed_without_checksum); + + /// Read partial data from first block. We don't use nextImpl in this method + /// Avoid to call unnecessary memcpy in read when second block fits entirely to output buffer + size_t size_partial = std::min((size_decompressed - nextimpl_working_buffer_offset),(n - bytes_read)); + pos = working_buffer.begin() + nextimpl_working_buffer_offset; + nextimpl_working_buffer_offset = 0; + bytes_read += read(to + bytes_read, size_partial); + } else { size_compressed = new_size_compressed; @@ -124,17 +143,12 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) /// This is for clang static analyzer. assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); - memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); - decompress(working_buffer, size_decompressed, size_compressed_without_checksum); - /// Manually take nextimpl_working_buffer_offset into account, because we don't use - /// nextImpl in this method. - pos = working_buffer.begin() + nextimpl_working_buffer_offset; - nextimpl_working_buffer_offset = 0; - + ///Read partial data from last block. We don't use nextImpl in this method as well. + pos = working_buffer.begin(); bytes_read += read(to + bytes_read, n - bytes_read); break; } From 547aeca83faf38e555f12f8d5b70b79672fa6331 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Sat, 9 Apr 2022 16:14:38 -0400 Subject: [PATCH 004/615] fixed comments --- src/Compression/CompressedReadBufferFromFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 333734af3d6..a876c77f1e8 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -119,7 +119,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) } else if (nextimpl_working_buffer_offset > 0) { - //Need to skip some bytes in decompressed data (seek happened before readBig call). + /// Need to skip some bytes in decompressed data (seek happened before readBig call). size_compressed = new_size_compressed; bytes += offset(); From c99fef5d82cc10e72790a9fa70a3f73e48b086c1 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Sat, 9 Apr 2022 16:31:24 -0400 Subject: [PATCH 005/615] Modifiy comments --- src/Compression/CompressedReadBufferFromFile.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index a876c77f1e8..9c7cd4b273a 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -129,8 +129,8 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) working_buffer = Buffer(memory.data(), &memory[size_decompressed]); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); - /// Read partial data from first block. We don't use nextImpl in this method - /// Avoid to call unnecessary memcpy in read when second block fits entirely to output buffer + /// Read partial data from first block. + /// Avoid to call unnecessary nextImpl+memcpy in read when second block fits entirely to output buffer size_t size_partial = std::min((size_decompressed - nextimpl_working_buffer_offset),(n - bytes_read)); pos = working_buffer.begin() + nextimpl_working_buffer_offset; nextimpl_working_buffer_offset = 0; @@ -147,7 +147,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) working_buffer = Buffer(memory.data(), &memory[size_decompressed]); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); - ///Read partial data from last block. We don't use nextImpl in this method as well. + ///Read partial data from last block. pos = working_buffer.begin(); bytes_read += read(to + bytes_read, n - bytes_read); break; From 0875fd087fc9f60c5d83d7eea2f3322b732b7ead Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Sat, 9 Apr 2022 17:53:08 -0400 Subject: [PATCH 006/615] Modify comments --- src/Compression/CompressedReadBufferFromFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 9c7cd4b273a..8845c9816a4 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -130,7 +130,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) decompress(working_buffer, size_decompressed, size_compressed_without_checksum); /// Read partial data from first block. - /// Avoid to call unnecessary nextImpl+memcpy in read when second block fits entirely to output buffer + /// Avoid to call nextImpl and unnecessary memcpy in read when the second block fits entirely to output buffer size_t size_partial = std::min((size_decompressed - nextimpl_working_buffer_offset),(n - bytes_read)); pos = working_buffer.begin() + nextimpl_working_buffer_offset; nextimpl_working_buffer_offset = 0; From d21d6befc1618c84e60255833e2fee6f7fb3d4b8 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Mon, 11 Apr 2022 05:41:40 -0400 Subject: [PATCH 007/615] Modify comments --- src/Compression/CompressedReadBufferFromFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 8845c9816a4..05fe7ee56c9 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -129,7 +129,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) working_buffer = Buffer(memory.data(), &memory[size_decompressed]); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); - /// Read partial data from first block. + /// Read partial data from first block. Won't run here at second block /// Avoid to call nextImpl and unnecessary memcpy in read when the second block fits entirely to output buffer size_t size_partial = std::min((size_decompressed - nextimpl_working_buffer_offset),(n - bytes_read)); pos = working_buffer.begin() + nextimpl_working_buffer_offset; From 62341229321b83b1755018b89c03c736a5400cae Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Mon, 11 Apr 2022 06:02:39 -0400 Subject: [PATCH 008/615] Modify comment --- src/Compression/CompressedReadBufferFromFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 05fe7ee56c9..dc8bba511f9 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -129,8 +129,8 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) working_buffer = Buffer(memory.data(), &memory[size_decompressed]); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); - /// Read partial data from first block. Won't run here at second block - /// Avoid to call nextImpl and unnecessary memcpy in read when the second block fits entirely to output buffer + /// Read partial data from first block. Won't run here at second block. + /// Avoid to call nextImpl and unnecessary memcpy in read when the second block fits entirely to output buffer. size_t size_partial = std::min((size_decompressed - nextimpl_working_buffer_offset),(n - bytes_read)); pos = working_buffer.begin() + nextimpl_working_buffer_offset; nextimpl_working_buffer_offset = 0; From ba112c230275e202c97598e43b1e976c03d3da97 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Mon, 11 Apr 2022 06:41:55 -0400 Subject: [PATCH 009/615] Fixed style issue --- src/Backups/registerBackupEnginesFileAndDisk.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index e3b06a21d96..fa1786c6350 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -53,7 +53,8 @@ namespace } /// Checks that a path specified as parameters of File() is valid. - void checkPath(fs::path & path, const Poco::Util::AbstractConfiguration & config, const fs::path & data_dir) { + void checkPath(fs::path & path, const Poco::Util::AbstractConfiguration & config, const fs::path & data_dir) + { path = path.lexically_normal(); if (path.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to backup must not be empty"); From e704e8d5d70824d7c83921ed36b5eb0e73253828 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Apr 2022 00:09:20 +0200 Subject: [PATCH 010/615] Allow to drop privileges at startup --- programs/install/Install.cpp | 18 +------- programs/main.cpp | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 16 deletions(-) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 5dec09ea901..118843eadd0 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -925,22 +925,8 @@ namespace if (!user.empty()) { -#if defined(OS_FREEBSD) - command = fmt::format("su -m '{}' -c '{}'", user, command); -#else - bool may_need_sudo = geteuid() != 0; - if (may_need_sudo) - { - struct passwd *p = getpwuid(geteuid()); - // Only use sudo when we are not the given user - if (p == nullptr || std::string(p->pw_name) != user) - command = fmt::format("sudo -u '{}' {}", user, command); - } - else - { - command = fmt::format("su -s /bin/sh '{}' -c '{}'", user, command); - } -#endif + if (0 != setenv("CLICKHOUSE_SETUID", user.c_str(), true)) + throwFromErrno("Cannot set environment variable CLICKHOUSE_SETUID that is required to dropping privileges", ErrorCodes::SYSTEM_ERROR); } fmt::print("Will run {}\n", command); diff --git a/programs/main.cpp b/programs/main.cpp index 2cdda075ca7..6913a709612 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -6,6 +6,10 @@ #include #endif +#include +#include +#include + #include #include #include @@ -18,11 +22,21 @@ #include #include #include +#include #include #include +namespace DB +{ + namespace ErrorCodes + { + extern const int SYSTEM_ERROR; + extern const int BAD_ARGUMENTS; + } +} + /// Universal executable for various clickhouse applications #if ENABLE_CLICKHOUSE_SERVER int mainEntryClickHouseServer(int argc, char ** argv); @@ -332,6 +346,68 @@ struct Checker #endif ; + +/// ClickHouse can drop privileges at startup. It is controlled by environment variables. +void setUserAndGroup() +{ + using namespace DB; + + static constexpr size_t buf_size = 16384; /// Linux man page says it is enough. Nevertheless, we will check if it's not enough and throw. + std::unique_ptr buf(new char[buf_size]); + + const char * env_uid = getenv("CLICKHOUSE_SETUID"); + if (env_uid && env_uid[0]) + { + /// Is it numeric id or name? + uid_t uid = 0; + if (!tryParse(uid, env_uid)) + { + passwd entry{}; + passwd * result{}; + + if (0 != getpwnam_r(env_uid, &entry, buf, buf_size, &result)) + throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); + + if (!result) + throw Exception("User {} specified in the CLICKHOUSE_SETUID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); + + uid = entry.pw_uid; + } + + if (uid == 0) + throw Exception("User specified in the CLICKHOUSE_SETUID environment variable has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); + + if (0 != setuid(uid)) + throwFromErrno(fmt::format("Cannot do 'setuid' to user, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); + } + + const char * env_gid = getenv("CLICKHOUSE_SETGID"); + if (env_gid && env_gid[0]) + { + gid_t gid = 0; + if (!tryParse(gid, env_gid)) + { + std::vector buf(buf_size); + group entry{}; + group * result{}; + + if (0 != getgrnam_r(env_gid, &entry, buf, buf_size, &result)) + throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); + + if (!result) + throw Exception("Group {} specified in the CLICKHOUSE_SETGID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); + + gid = entry.gr_gid; + } + + if (gid == 0) + throw Exception("Group specified in the CLICKHOUSE_SETGID environment variable has id 0, but dropping privileges to gid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); + + if (0 != setgid(gid)) + throwFromErrno(fmt::format("Cannot do 'setgid' to user, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); + } +} + } @@ -352,6 +428,9 @@ int main(int argc_, char ** argv_) inside_main = true; SCOPE_EXIT({ inside_main = false; }); + /// Drop privileges if needed. + setUserAndGroup(); + /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. std::set_new_handler(nullptr); From dda060063be9d2d473d6b6d814be2eabbe1fa6c9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Apr 2022 00:14:27 +0200 Subject: [PATCH 011/615] Remove trash --- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 10 +-- docker/server/entrypoint.sh | 30 ++----- docker/server/su-exec.c | 138 -------------------------------- 4 files changed, 8 insertions(+), 172 deletions(-) delete mode 100644 docker/server/su-exec.c diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5aaf5dd5511..b01dba1e22f 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -63,7 +63,7 @@ RUN arch=${TARGETARCH:-amd64} \ && chown clickhouse:clickhouse /var/lib/clickhouse \ && chown root:clickhouse /var/log/clickhouse-server \ && chmod +x /entrypoint.sh \ - && apk add --no-cache su-exec bash tzdata \ + && apk add --no-cache bash tzdata \ && cp /usr/share/zoneinfo/UTC /etc/localtime \ && echo "UTC" > /etc/timezone \ && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 6e93bd97036..3931974e938 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -3,8 +3,6 @@ FROM ubuntu:20.04 # see https://github.com/moby/moby/issues/4032#issuecomment-192327844 ARG DEBIAN_FRONTEND=noninteractive -COPY su-exec.c /su-exec.c - # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \ @@ -18,13 +16,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list gnupg \ locales \ wget \ - tzdata \ - && apt-get install -y --no-install-recommends tcc libc-dev && \ - tcc /su-exec.c -o /bin/su-exec && \ - chown root:root /bin/su-exec && \ - chmod 0755 /bin/su-exec && \ - rm /su-exec.c && \ - apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \ + tzdata && apt-get clean ARG REPO_CHANNEL="stable" diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 20ad0e03bfe..84a9ede02fb 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -15,29 +15,15 @@ CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}" if [ "$(id -u)" = "0" ]; then USER=$CLICKHOUSE_UID GROUP=$CLICKHOUSE_GID - if command -v gosu &> /dev/null; then - gosu="gosu $USER:$GROUP" - elif command -v su-exec &> /dev/null; then - gosu="su-exec $USER:$GROUP" - else - echo "No gosu/su-exec detected!" - exit 1 - fi else USER="$(id -u)" GROUP="$(id -g)" - gosu="" DO_CHOWN=0 fi # set some vars CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}" -if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then - echo "Configuration file '$CLICKHOUSE_CONFIG' isn't readable by user with id '$USER'" - exit 1 -fi - # get CH directories locations DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)" TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)" @@ -65,12 +51,7 @@ do # check if variable not empty [ -z "$dir" ] && continue # ensure directories exist - if [ "$DO_CHOWN" = "1" ]; then - mkdir="mkdir" - else - mkdir="$gosu mkdir" - fi - if ! $mkdir -p "$dir"; then + if ! mkdir -p "$dir"; then echo "Couldn't create necessary directory: $dir" exit 1 fi @@ -81,9 +62,6 @@ do if [ "$(stat -c %u "$dir")" != "$USER" ] || [ "$(stat -c %g "$dir")" != "$GROUP" ]; then chown -R "$USER:$GROUP" "$dir" fi - elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then - echo "Necessary directory '$dir' isn't accessible by user with id '$USER'" - exit 1 fi done @@ -116,8 +94,12 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # port is needed to check if clickhouse-server is ready for connections HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)" + # Drop privileges + CLICKHOUSE_UID="${USER}" + CLICKHOUSE_GID="${GROUP}" + # Listen only on localhost until the initialization is done - $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 & + /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 & pid="$!" # check if clickhouse is ready to accept connections diff --git a/docker/server/su-exec.c b/docker/server/su-exec.c deleted file mode 100644 index a375e704f55..00000000000 --- a/docker/server/su-exec.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - -https://github.com/ncopa/su-exec -The file is copy-pasted verbatim to avoid supply chain attacks. - -The MIT License (MIT) - -Copyright (c) 2015 ncopa - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -*/ - -/* set user and group id and exec */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -static char *argv0; - -static void usage(int exitcode) -{ - printf("Usage: %s user-spec command [args]\n", argv0); - exit(exitcode); -} - -int main(int argc, char *argv[]) -{ - char *user, *group, **cmdargv; - char *end; - - uid_t uid = getuid(); - gid_t gid = getgid(); - - argv0 = argv[0]; - if (argc < 3) - usage(0); - - user = argv[1]; - group = strchr(user, ':'); - if (group) - *group++ = '\0'; - - cmdargv = &argv[2]; - - struct passwd *pw = NULL; - if (user[0] != '\0') { - uid_t nuid = strtol(user, &end, 10); - if (*end == '\0') - uid = nuid; - else { - pw = getpwnam(user); - if (pw == NULL) - err(1, "getpwnam(%s)", user); - } - } - if (pw == NULL) { - pw = getpwuid(uid); - } - if (pw != NULL) { - uid = pw->pw_uid; - gid = pw->pw_gid; - } - - setenv("HOME", pw != NULL ? pw->pw_dir : "/", 1); - - if (group && group[0] != '\0') { - /* group was specified, ignore grouplist for setgroups later */ - pw = NULL; - - gid_t ngid = strtol(group, &end, 10); - if (*end == '\0') - gid = ngid; - else { - struct group *gr = getgrnam(group); - if (gr == NULL) - err(1, "getgrnam(%s)", group); - gid = gr->gr_gid; - } - } - - if (pw == NULL) { - if (setgroups(1, &gid) < 0) - err(1, "setgroups(%i)", gid); - } else { - int ngroups = 0; - gid_t *glist = NULL; - - while (1) { - int r = getgrouplist(pw->pw_name, gid, glist, &ngroups); - - if (r >= 0) { - if (setgroups(ngroups, glist) < 0) - err(1, "setgroups"); - break; - } - - glist = realloc(glist, ngroups * sizeof(gid_t)); - if (glist == NULL) - err(1, "malloc"); - } - } - - if (setgid(gid) < 0) - err(1, "setgid(%i)", gid); - - if (setuid(uid) < 0) - err(1, "setuid(%i)", uid); - - execvp(cmdargv[0], cmdargv); - err(1, "%s", cmdargv[0]); - - return 1; -} From 83de3bb1de328dbba720e9d86bd350b5a0658fd2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Apr 2022 00:19:36 +0200 Subject: [PATCH 012/615] Allow to drop privileges at startup --- programs/main.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/programs/main.cpp b/programs/main.cpp index 6913a709612..242b86289f4 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -93,8 +93,6 @@ int mainEntryClickHouseHashBinary(int, char **) return 0; } -#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) - namespace { @@ -201,7 +199,7 @@ auto instructionFailToString(InstructionFail fail) { switch (fail) { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, ARRAY_SIZE(x) - 1) +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) case InstructionFail::NONE: ret("NONE"); case InstructionFail::SSE3: @@ -289,7 +287,7 @@ void checkRequiredInstructionsImpl(volatile InstructionFail & fail) #define writeError(data) do \ { \ static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, ARRAY_SIZE(data) - 1)) \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ _Exit(1); \ } while (false) @@ -365,7 +363,7 @@ void setUserAndGroup() passwd entry{}; passwd * result{}; - if (0 != getpwnam_r(env_uid, &entry, buf, buf_size, &result)) + if (0 != getpwnam_r(env_uid, &entry, buf.get(), buf_size, &result)) throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); if (!result) @@ -387,11 +385,10 @@ void setUserAndGroup() gid_t gid = 0; if (!tryParse(gid, env_gid)) { - std::vector buf(buf_size); group entry{}; group * result{}; - if (0 != getgrnam_r(env_gid, &entry, buf, buf_size, &result)) + if (0 != getgrnam_r(env_gid, &entry, buf.get(), buf_size, &result)) throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); if (!result) From b7e5a81215311c22b16727c36b6ecc6e6dd13af7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Apr 2022 00:24:47 +0200 Subject: [PATCH 013/615] Allow to drop privileges at startup --- docker/server/entrypoint.sh | 4 ++-- programs/main.cpp | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 84a9ede02fb..9d337e53a68 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -95,8 +95,8 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)" # Drop privileges - CLICKHOUSE_UID="${USER}" - CLICKHOUSE_GID="${GROUP}" + CLICKHOUSE_SETUID="${USER}" + CLICKHOUSE_SETGID="${GROUP}" # Listen only on localhost until the initialization is done /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 & diff --git a/programs/main.cpp b/programs/main.cpp index 242b86289f4..45b74d38217 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -358,7 +358,7 @@ void setUserAndGroup() { /// Is it numeric id or name? uid_t uid = 0; - if (!tryParse(uid, env_uid)) + if (!tryParse(uid, env_uid) || uid == 0) { passwd entry{}; passwd * result{}; @@ -383,7 +383,7 @@ void setUserAndGroup() if (env_gid && env_gid[0]) { gid_t gid = 0; - if (!tryParse(gid, env_gid)) + if (!tryParse(gid, env_gid) || gid == 0) { group entry{}; group * result{}; @@ -426,7 +426,15 @@ int main(int argc_, char ** argv_) SCOPE_EXIT({ inside_main = false; }); /// Drop privileges if needed. - setUserAndGroup(); + try + { + setUserAndGroup(); + } + catch (...) + { + std::cerr << DB::getCurrentExceptionMessage() << '\n'; + return 1; + } /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. From 803edc387979c8f83df38dd19b82a02ed715797e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Apr 2022 00:25:55 +0200 Subject: [PATCH 014/615] Allow to drop privileges at startup --- programs/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/main.cpp b/programs/main.cpp index 45b74d38217..f0e748bf440 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -432,7 +432,7 @@ int main(int argc_, char ** argv_) } catch (...) { - std::cerr << DB::getCurrentExceptionMessage() << '\n'; + std::cerr << DB::getCurrentExceptionMessage("setUserAndGroup") << '\n'; return 1; } From 2b67d9919305d57e84f71972278b3797939767ba Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Apr 2022 00:26:53 +0200 Subject: [PATCH 015/615] Allow to drop privileges at startup --- programs/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/main.cpp b/programs/main.cpp index f0e748bf440..531831d8bc6 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -432,7 +432,7 @@ int main(int argc_, char ** argv_) } catch (...) { - std::cerr << DB::getCurrentExceptionMessage("setUserAndGroup") << '\n'; + std::cerr << DB::getCurrentExceptionMessage("setUserAndGroup", false) << '\n'; return 1; } From e0121dfe6f2f037114791007f8981e54eb8057e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Apr 2022 00:33:58 +0200 Subject: [PATCH 016/615] Remove old comment --- docker/server/Dockerfile.ubuntu | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 3931974e938..d5f1a3929b7 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -43,21 +43,6 @@ ARG single_binary_location_url="" # installed to prevent picking those uid / gid by some unrelated software. # The same uid / gid (101) is used both for alpine and ubuntu. -# To drop privileges, we need 'su' command, that simply changes uid and gid. -# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux: -# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking -# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal -# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does) -# and for these reasons people are using alternatives to the 'su' command in Docker, -# that don't mess with the terminal, don't care about closing the opened files, etc... -# but can only be safe to drop privileges inside Docker. -# The question - what implementation of 'su' command to use. -# It should be a simple script doing about just two syscalls. -# Some people tend to use 'gosu' tool that is written in Go. -# It is not used for several reasons: -# 1. Dependency on some foreign code in yet another programming language - does not sound alright. -# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners. - ARG TARGETARCH RUN arch=${TARGETARCH:-amd64} \ From 6ce66e6d13a6d31d9fcf4dd07a1d855aec775e14 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 09:59:09 -0700 Subject: [PATCH 017/615] add func h3GetUnidirectionalEdge --- src/Functions/h3GetUnidirectionalEdge.cpp | 114 ++++++++++++++++++++++ src/Functions/registerFunctionsGeo.cpp | 2 + 2 files changed, 116 insertions(+) create mode 100644 src/Functions/h3GetUnidirectionalEdge.cpp diff --git a/src/Functions/h3GetUnidirectionalEdge.cpp b/src/Functions/h3GetUnidirectionalEdge.cpp new file mode 100644 index 00000000000..4981e973e36 --- /dev/null +++ b/src/Functions/h3GetUnidirectionalEdge.cpp @@ -0,0 +1,114 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3GetUnidirectionalEdge : public IFunction +{ +public: + static constexpr auto name = "h3GetUnidirectionalEdge"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + arg = arguments[1].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 2, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_hindex_origin = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_hindex_origin) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_hindex_origin = col_hindex_origin->getData(); + + const auto * col_hindex_dest = checkAndGetColumn(non_const_arguments[1].column.get()); + if (!col_hindex_dest) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_hindex_dest = col_hindex_dest->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 origin = data_hindex_origin[row]; + const UInt64 dest = data_hindex_dest[row]; + const UInt64 res = cellsToDirectedEdge(origin, dest); + dst_data[row] = res; + } + + return dst; + } +}; + +} + +void registerFunctionH3GetUnidirectionalEdge(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 9cbe1ed96cf..69956628f08 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -57,6 +57,7 @@ void registerFunctionH3PointDistKm(FunctionFactory &); void registerFunctionH3PointDistRads(FunctionFactory &); void registerFunctionH3GetRes0Indexes(FunctionFactory &); void registerFunctionH3GetPentagonIndexes(FunctionFactory &); +void registerFunctionH3GetUnidirectionalEdge(FunctionFactory &); #endif @@ -128,6 +129,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3PointDistRads(factory); registerFunctionH3GetRes0Indexes(factory); registerFunctionH3GetPentagonIndexes(factory); + registerFunctionH3GetUnidirectionalEdge(factory); #endif #if USE_S2_GEOMETRY From 12f74bc124108f3d3141d384987963e78a2430ea Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 10:47:35 -0700 Subject: [PATCH 018/615] add tests for h3GetunidirectionalEdge --- .../0_stateless/02292_h3_get_unidirectional_edge.reference | 2 ++ .../queries/0_stateless/02292_h3_get_unidirectional_edge.sql | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edge.reference create mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edge.sql diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.reference new file mode 100644 index 00000000000..5aea166fbd0 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.reference @@ -0,0 +1,2 @@ +1248204388774707199 +0 diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.sql new file mode 100644 index 00000000000..ddab17e47e9 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest + +select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('85283477fffffff')); +select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('85283473fffffff')); From 925afc8f19d00324c2a11ce6e0ee679f77b45248 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 10:47:56 -0700 Subject: [PATCH 019/615] docs for h3GetUnidirectionalEdge --- docs/en/sql-reference/functions/geo/h3.md | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 50115dd4d75..da98eeedc4d 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1026,4 +1026,41 @@ Result: │ 41162 │ └─────────────┘ ``` + +## h3GetUnidirectionalEdge {#h3getunidirectionaledge} + +Returns a unidirectional edge H3 index based on the provided origin and destination and returns 0 on error. + +**Syntax** + +``` sql +h3GetUnidirectionalEdge(originIndex, destinationIndex) +``` + +**Parameter** + +- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Unidirectional Edge Hexagon Index number. + +Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql + SELECT h3GetUnidirectionalEdge(599686042433355775, 599686043507097599) as edge; +``` + +Result: + +``` text +┌────────────────edge─┐ +│ 1248204388774707199 │ +└─────────────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) From 0e4a8337172421065795f2a8677ee65c949a6240 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 11:18:41 -0700 Subject: [PATCH 020/615] add h3UnidirectionalEdgeIsValid func --- src/Functions/h3UnidirectionalEdgeIsValid.cpp | 95 +++++++++++++++++++ src/Functions/registerFunctionsGeo.cpp | 2 + 2 files changed, 97 insertions(+) create mode 100644 src/Functions/h3UnidirectionalEdgeIsValid.cpp diff --git a/src/Functions/h3UnidirectionalEdgeIsValid.cpp b/src/Functions/h3UnidirectionalEdgeIsValid.cpp new file mode 100644 index 00000000000..6f517d25985 --- /dev/null +++ b/src/Functions/h3UnidirectionalEdgeIsValid.cpp @@ -0,0 +1,95 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3UnidirectionalEdgeIsValid : public IFunction +{ +public: + static constexpr auto name = "h3UnidirectionalEdgeIsValid"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_hindex_edge = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_hindex_edge) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_hindex_edge = col_hindex_edge->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 edge = data_hindex_edge[row]; + const UInt8 res = isValidDirectedEdge(edge); + dst_data[row] = res; + } + + return dst; + } +}; + +} + +void registerFunctionH3UnidirectionalEdgeIsValid(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 69956628f08..1485780c491 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -58,6 +58,7 @@ void registerFunctionH3PointDistRads(FunctionFactory &); void registerFunctionH3GetRes0Indexes(FunctionFactory &); void registerFunctionH3GetPentagonIndexes(FunctionFactory &); void registerFunctionH3GetUnidirectionalEdge(FunctionFactory &); +void registerFunctionH3UnidirectionalEdgeIsValid(FunctionFactory &); #endif @@ -130,6 +131,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3GetRes0Indexes(factory); registerFunctionH3GetPentagonIndexes(factory); registerFunctionH3GetUnidirectionalEdge(factory); + registerFunctionH3UnidirectionalEdgeIsValid(factory); #endif #if USE_S2_GEOMETRY From 276b39b9ac08049cc5e9db1762d8f85ff4d4c0bb Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 13:21:42 -0700 Subject: [PATCH 021/615] add test h3UnidirectionalEdgeIsValid --- .../0_stateless/02292_h3_is_unidirectional_edge.reference | 2 ++ tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02292_h3_is_unidirectional_edge.reference create mode 100644 tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql diff --git a/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql new file mode 100644 index 00000000000..f64ae292221 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest + +SELECT h3UnidirectionalEdgeIsValid(1248204388774707199) as edge; +SELECT h3UnidirectionalEdgeIsValid(1248204388774707197) as edge; From 59e1f252fd7cfb272dc6fe7e3b727e2e5d85b399 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 13:27:48 -0700 Subject: [PATCH 022/615] docs for h3UnidirectionalEdgeIsValid func --- docs/en/sql-reference/functions/geo/h3.md | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index da98eeedc4d..cc34c916187 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1063,4 +1063,41 @@ Result: │ 1248204388774707199 │ └─────────────────────┘ ``` + +## h3UnidirectionalEdgeIsValid {#h3unidirectionaledgeisvalid} + +Determines if the provided H3Index is a valid unidirectional edge index. Returns 1 if it's a unidirectional edge and 0 otherwise. + +**Syntax** + +``` sql +h3UnidirectionalEdgeisValid(index) +``` + +**Parameter** + +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- 1 — The H3 index is a valid unidirectional edge. +- 0 — The H3 index is not a valid unidirectional edge. + +Type: [UInt8](../../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql + SELECT h3UnidirectionalEdgeIsValid(1248204388774707199) as validOrNot; +``` + +Result: + +``` text +┌─validOrNot─┐ +│ 1 │ +└────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) From 7e871adf91c5acc53966ee0998eba48e3afeacce Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 13:47:43 -0700 Subject: [PATCH 023/615] add h3GetOriginIndexFromUnidirectionalEdge func --- ...h3GetOriginIndexFromUnidirectionalEdge.cpp | 95 +++++++++++++++++++ src/Functions/registerFunctionsGeo.cpp | 2 + 2 files changed, 97 insertions(+) create mode 100644 src/Functions/h3GetOriginIndexFromUnidirectionalEdge.cpp diff --git a/src/Functions/h3GetOriginIndexFromUnidirectionalEdge.cpp b/src/Functions/h3GetOriginIndexFromUnidirectionalEdge.cpp new file mode 100644 index 00000000000..acf94fcf95f --- /dev/null +++ b/src/Functions/h3GetOriginIndexFromUnidirectionalEdge.cpp @@ -0,0 +1,95 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3GetOriginIndexFromUnidirectionalEdge : public IFunction +{ +public: + static constexpr auto name = "h3GetOriginIndexFromUnidirectionalEdge"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_hindex_edge = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_hindex_edge) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_hindex_edge = col_hindex_edge->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 edge = data_hindex_edge[row]; + const UInt64 res = getDirectedEdgeOrigin(edge); + dst_data[row] = res; + } + + return dst; + } +}; + +} + +void registerFunctionH3GetOriginIndexFromUnidirectionalEdge(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 1485780c491..5c8445f3880 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -59,6 +59,7 @@ void registerFunctionH3GetRes0Indexes(FunctionFactory &); void registerFunctionH3GetPentagonIndexes(FunctionFactory &); void registerFunctionH3GetUnidirectionalEdge(FunctionFactory &); void registerFunctionH3UnidirectionalEdgeIsValid(FunctionFactory &); +void registerFunctionH3GetOriginIndexFromUnidirectionalEdge(FunctionFactory &); #endif @@ -132,6 +133,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3GetPentagonIndexes(factory); registerFunctionH3GetUnidirectionalEdge(factory); registerFunctionH3UnidirectionalEdgeIsValid(factory); + registerFunctionH3GetOriginIndexFromUnidirectionalEdge(factory); #endif #if USE_S2_GEOMETRY From 7cb7d142a9292924d30a5be1b9d81ac0f588239c Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 13:48:22 -0700 Subject: [PATCH 024/615] add tests h3GetOriginIndexFromUnidirectionalEdge --- ...92_h3_get_origin_index_from_unidirectional_edge.reference | 3 +++ .../02292_h3_get_origin_index_from_unidirectional_edge.sql | 5 +++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.reference create mode 100644 tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.sql diff --git a/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.reference new file mode 100644 index 00000000000..1c42d8408b9 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.reference @@ -0,0 +1,3 @@ +599686042433355775 +599686042433355773 +0 diff --git a/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.sql new file mode 100644 index 00000000000..370ccca2e44 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest + +SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707199); +SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707197); +SELECT h3GetOriginIndexFromUnidirectionalEdge(599686042433355775); From 9ec187ef221b6057884ab2a7a90f12da2a81ec86 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 13:53:44 -0700 Subject: [PATCH 025/615] docs for h3GetOriginIndexFromUnidirectionalEdge --- docs/en/sql-reference/functions/geo/h3.md | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index cc34c916187..5a0d80dd267 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1100,4 +1100,40 @@ Result: │ 1 │ └────────────┘ ``` + +## h3GetOriginIndexFromUnidirectionalEdge {#h3getoriginindexfromunidirectionaledge} + +Returns the origin hexagon index from the unidirectional edge H3Index. + +**Syntax** + +``` sql +h3GetOriginIndexFromUnidirectionalEdge(edge) +``` + +**Parameter** + +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Origin Hexagon Index number. + +Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql + SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707197) as origin; +``` + +Result: + +``` text +┌─────────────origin─┐ +│ 599686042433355773 │ +└────────────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) From 77b5f6fee05d14c1a2327ce2477c334712102cf0 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 14:06:45 -0700 Subject: [PATCH 026/615] add h3GetDestinationIndexFromUnidirectionalEdge func --- ...DestinationIndexFromUnidirectionalEdge.cpp | 95 +++++++++++++++++++ src/Functions/registerFunctionsGeo.cpp | 2 + 2 files changed, 97 insertions(+) create mode 100644 src/Functions/h3GetDestinationIndexFromUnidirectionalEdge.cpp diff --git a/src/Functions/h3GetDestinationIndexFromUnidirectionalEdge.cpp b/src/Functions/h3GetDestinationIndexFromUnidirectionalEdge.cpp new file mode 100644 index 00000000000..4a4a36076b2 --- /dev/null +++ b/src/Functions/h3GetDestinationIndexFromUnidirectionalEdge.cpp @@ -0,0 +1,95 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3GetDestinationIndexFromUnidirectionalEdge : public IFunction +{ +public: + static constexpr auto name = "h3GetDestinationIndexFromUnidirectionalEdge"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_hindex_edge = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_hindex_edge) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_hindex_edge = col_hindex_edge->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 edge = data_hindex_edge[row]; + const UInt64 res = getDirectedEdgeDestination(edge); + dst_data[row] = res; + } + + return dst; + } +}; + +} + +void registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 5c8445f3880..62f2aecd645 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -60,6 +60,7 @@ void registerFunctionH3GetPentagonIndexes(FunctionFactory &); void registerFunctionH3GetUnidirectionalEdge(FunctionFactory &); void registerFunctionH3UnidirectionalEdgeIsValid(FunctionFactory &); void registerFunctionH3GetOriginIndexFromUnidirectionalEdge(FunctionFactory &); +void registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(FunctionFactory &); #endif @@ -134,6 +135,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3GetUnidirectionalEdge(factory); registerFunctionH3UnidirectionalEdgeIsValid(factory); registerFunctionH3GetOriginIndexFromUnidirectionalEdge(factory); + registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(factory); #endif #if USE_S2_GEOMETRY From 7e96156bcca4760b871c09f910a81f9f4bdc2741 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 14:09:31 -0700 Subject: [PATCH 027/615] tests for h3GetDestinationIndexFromUnidirectionalEdge --- ...02292_h3_get_dest_index_from_unidirectional_edge.reference | 2 ++ .../02292_h3_get_dest_index_from_unidirectional_edge.sql | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.reference create mode 100644 tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.sql diff --git a/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.reference new file mode 100644 index 00000000000..02c4e6f71f6 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.reference @@ -0,0 +1,2 @@ +599686043507097597 +0 diff --git a/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.sql new file mode 100644 index 00000000000..d8dd57508df --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest + +SELECT h3GetDestinationIndexFromUnidirectionalEdge(1248204388774707197); +SELECT h3GetDestinationIndexFromUnidirectionalEdge(599686042433355773); From b5a914ed0427016d63c9e9b4f98e954bc206d4cb Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 14:11:53 -0700 Subject: [PATCH 028/615] docs for h3GetDestinationIndexFromUnidirectionalEdge --- docs/en/sql-reference/functions/geo/h3.md | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 5a0d80dd267..b28066017d8 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1136,4 +1136,40 @@ Result: │ 599686042433355773 │ └────────────────────┘ ``` + +## h3GetDestinationIndexFromUnidirectionalEdge {#h3getdestinationindexfromunidirectionaledge} + +Returns the destination hexagon index from the unidirectional edge H3Index. + +**Syntax** + +``` sql +h3GetDestinationIndexFromUnidirectionalEdge(edge) +``` + +**Parameter** + +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Destination Hexagon Index number. + +Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql + SELECT h3GetDestinationIndexFromUnidirectionalEdge(1248204388774707197) as destination; +``` + +Result: + +``` text +┌────────destination─┐ +│ 599686043507097597 │ +└────────────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) From 30d14c1217ea6e28bc64e79921fdd03062685d92 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 21:26:44 -0700 Subject: [PATCH 029/615] add h3GetIndexesFromUnidirectionalEdge func --- .../h3GetIndexesFromUnidirectionalEdge.cpp | 110 ++++++++++++++++++ src/Functions/registerFunctionsGeo.cpp | 2 + 2 files changed, 112 insertions(+) create mode 100644 src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp diff --git a/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp b/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp new file mode 100644 index 00000000000..e5f72deedef --- /dev/null +++ b/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp @@ -0,0 +1,110 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3GetIndexesFromUnidirectionalEdge : public IFunction +{ +public: + static constexpr auto name = "h3GetIndexesFromUnidirectionalEdge"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared( + DataTypes{std::make_shared(), std::make_shared()}, + Strings{"origin", "destination"}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_hindex_edge = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_hindex_edge) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_hindex_edge = col_hindex_edge->getData(); + + auto origin = ColumnUInt64::create(input_rows_count); + auto destination = ColumnUInt64::create(input_rows_count); + + ColumnUInt64::Container & origin_data = origin->getData(); + ColumnUInt64::Container & destination_data = destination->getData(); + + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 edge = data_hindex_edge[row]; + std::vector res; + // resize to 2 as directedEdgeToCells func sets the origin and + // destination at [0] and [1] of the input vector + res.resize(2); + + directedEdgeToCells(edge, res.data()); + + origin_data[row] = res[0]; + destination_data[row] = res[1]; + } + + MutableColumns columns; + columns.emplace_back(std::move(origin)); + columns.emplace_back(std::move(destination)); + + return ColumnTuple::create(std::move(columns)); + } +}; + +} + +void registerFunctionH3GetIndexesFromUnidirectionalEdge(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 62f2aecd645..faa9c706e6c 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -61,6 +61,7 @@ void registerFunctionH3GetUnidirectionalEdge(FunctionFactory &); void registerFunctionH3UnidirectionalEdgeIsValid(FunctionFactory &); void registerFunctionH3GetOriginIndexFromUnidirectionalEdge(FunctionFactory &); void registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(FunctionFactory &); +void registerFunctionH3GetIndexesFromUnidirectionalEdge(FunctionFactory &); #endif @@ -136,6 +137,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3UnidirectionalEdgeIsValid(factory); registerFunctionH3GetOriginIndexFromUnidirectionalEdge(factory); registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(factory); + registerFunctionH3GetIndexesFromUnidirectionalEdge(factory); #endif #if USE_S2_GEOMETRY From 853811c94aef59c8d791023cdf87a8df351d38ff Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 21:26:55 -0700 Subject: [PATCH 030/615] tests for h3GetIndexesFromUnidirectionalEdge --- .../02292_h3_get_indexes_from_unidirectional_edge.reference | 2 ++ .../02292_h3_get_indexes_from_unidirectional_edge.sql | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.reference create mode 100644 tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.sql diff --git a/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.reference new file mode 100644 index 00000000000..05e72bbe3ee --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.reference @@ -0,0 +1,2 @@ +(599686042433355775,599686043507097599) +(0,0) diff --git a/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.sql new file mode 100644 index 00000000000..5d784291ef8 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest + +SELECT h3GetIndexesFromUnidirectionalEdge(1248204388774707199); +SELECT h3GetIndexesFromUnidirectionalEdge(599686042433355775); \ No newline at end of file From 03db6738bb8ac1f2bd83a719f0fe332529e49a0b Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 21:34:32 -0700 Subject: [PATCH 031/615] docs for h3GetIndexesFromUnidirectionalEdge --- docs/en/sql-reference/functions/geo/h3.md | 39 +++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index b28066017d8..924b1893059 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1172,4 +1172,43 @@ Result: │ 599686043507097597 │ └────────────────────┘ ``` + +## h3GetIndexesFromUnidirectionalEdge {#h3getindexesfromunidirectionaledge} + +Returns the origin and destination hexagon indexes from the given unidirectional edge H3Index. + +**Syntax** + +``` sql +h3GetIndexesFromUnidirectionalEdge(edge) +``` + +**Parameter** + +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +A tuple consisting of two values `tuple(origin,destination)`: + +- `origin` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `destination` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). + +Returns `(0,0)` if the provided input is not valid. + +**Example** + +Query: + +``` sql + SELECT h3GetIndexesFromUnidirectionalEdge(1248204388774707199) as indexes; +``` + +Result: + +``` text +┌─indexes─────────────────────────────────┐ +│ (599686042433355775,599686043507097599) │ +└─────────────────────────────────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) From 745a44a7b0b90782d2e9bf4dfd06de5922997d1a Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 22:25:58 -0700 Subject: [PATCH 032/615] add h3GetUnidirectionalEdgesFromHexagon func --- .../h3GetUnidirectionalEdgesFromHexagon.cpp | 113 ++++++++++++++++++ src/Functions/registerFunctionsGeo.cpp | 2 + 2 files changed, 115 insertions(+) create mode 100644 src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp diff --git a/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp b/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp new file mode 100644 index 00000000000..1fca0353b62 --- /dev/null +++ b/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp @@ -0,0 +1,113 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3GetUnidirectionalEdgesFromHexagon : public IFunction +{ +public: + static constexpr auto name = "h3GetUnidirectionalEdgesFromHexagon"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_hindex_edge = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_hindex_edge) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_hindex_edge = col_hindex_edge->getData(); + + auto result_column_data = ColumnUInt64::create(); + auto & result_data = result_column_data->getData(); + + auto result_column_offsets = ColumnArray::ColumnOffsets::create(); + auto & result_offsets = result_column_offsets->getData(); + result_offsets.resize(input_rows_count); + + auto current_offset = 0; + std::vector res_vec; + result_data.reserve(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 edge = data_hindex_edge[row]; + // originToDirectedEdges places only 6 edges into + // res_vec that's passed + res_vec.resize(6); + + originToDirectedEdges(edge, res_vec.data()); + + for (auto & i : res_vec) + { + ++current_offset; + result_data.emplace_back(i); + } + + result_offsets[row] = current_offset; + res_vec.clear(); + } + return ColumnArray::create(std::move(result_column_data), std::move(result_column_offsets)); + } +}; + +} + +void registerFunctionH3GetUnidirectionalEdgesFromHexagon(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index faa9c706e6c..391ab664d52 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -62,6 +62,7 @@ void registerFunctionH3UnidirectionalEdgeIsValid(FunctionFactory &); void registerFunctionH3GetOriginIndexFromUnidirectionalEdge(FunctionFactory &); void registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(FunctionFactory &); void registerFunctionH3GetIndexesFromUnidirectionalEdge(FunctionFactory &); +void registerFunctionH3GetUnidirectionalEdgesFromHexagon(FunctionFactory &); #endif @@ -138,6 +139,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3GetOriginIndexFromUnidirectionalEdge(factory); registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(factory); registerFunctionH3GetIndexesFromUnidirectionalEdge(factory); + registerFunctionH3GetUnidirectionalEdgesFromHexagon(factory); #endif #if USE_S2_GEOMETRY From 91d132d645492364f4a667e38826765cec01b084 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 22:30:47 -0700 Subject: [PATCH 033/615] tests for h3GetUnidirectionalEdgesFromHexagon --- .../02292_h3_get_unidirectional_edges_from_hexagon.reference | 2 ++ .../02292_h3_get_unidirectional_edges_from_hexagon.sql | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.reference create mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.sql diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.reference b/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.reference new file mode 100644 index 00000000000..767c017933e --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.reference @@ -0,0 +1,2 @@ +[1248204388774707199,1320261982812635135,1392319576850563071,1464377170888491007,1536434764926418943,1608492358964346879] +[1248204388774707197,1320261982812635133,1392319576850563069,1464377170888491005,1536434764926418941,1608492358964346877] diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.sql b/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.sql new file mode 100644 index 00000000000..5f71a85fb37 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest + +SELECT h3GetUnidirectionalEdgesFromHexagon(1248204388774707199); +SELECT h3GetUnidirectionalEdgesFromHexagon(599686042433355773); \ No newline at end of file From 680fb0f2659158fa689f88dd55a59449137afb84 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sun, 1 May 2022 22:38:01 -0700 Subject: [PATCH 034/615] docs for h3GetUnidirectionalEdgesFromHexagon --- docs/en/sql-reference/functions/geo/h3.md | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 924b1893059..0fd11daf6cf 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1211,4 +1211,40 @@ Result: │ (599686042433355775,599686043507097599) │ └─────────────────────────────────────────┘ ``` + +## h3GetUnidirectionalEdgesFromHexagon {#h3getunidirectionaledgesfromhexagon} + +Provides all of the unidirectional edges from the provided H3Index. + +**Syntax** + +``` sql +h3GetUnidirectionalEdgesFromHexagon(index) +``` + +**Parameter** + +- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +Array of h3 indexes representing each unidirectional edge: + +Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). + +**Example** + +Query: + +``` sql + SELECT h3GetUnidirectionalEdgesFromHexagon(1248204388774707199) as edges; +``` + +Result: + +``` text +┌─edges─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [1248204388774707199,1320261982812635135,1392319576850563071,1464377170888491007,1536434764926418943,1608492358964346879] │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) From a0da885c3cfde3b64640d4104f6cd79cd2d0e721 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Mon, 2 May 2022 21:38:54 -0700 Subject: [PATCH 035/615] add h3GetUnidirectionalEdgeBoundary func --- .../h3GetUnidirectionalEdgeBoundary.cpp | 112 ++++++++++++++++++ src/Functions/registerFunctionsGeo.cpp | 2 + 2 files changed, 114 insertions(+) create mode 100644 src/Functions/h3GetUnidirectionalEdgeBoundary.cpp diff --git a/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp b/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp new file mode 100644 index 00000000000..d99b401e4e9 --- /dev/null +++ b/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp @@ -0,0 +1,112 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int INCORRECT_DATA; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3GetUnidirectionalEdgeBoundary : public IFunction +{ +public: + static constexpr auto name = "h3GetUnidirectionalEdgeBoundary"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared( + std::make_shared( + DataTypes{std::make_shared(), std::make_shared()})); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_hindex_edge = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_hindex_edge) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_hindex_edge = col_hindex_edge->getData(); + + auto latitude = ColumnFloat64::create(); + auto longitude = ColumnFloat64::create(); + auto offsets = DataTypeNumber().createColumn(); + offsets->reserve(input_rows_count); + IColumn::Offset current_offset = 0; + + for (size_t row = 0; row < input_rows_count; ++row) + { + H3Index edge = data_hindex_edge[row]; + CellBoundary boundary{}; + + directedEdgeToBoundary(edge, &boundary); + + for (int vert = 0; vert < boundary.numVerts; ++vert) + { + latitude->insert(radsToDegs(boundary.verts[vert].lat)); + longitude->insert(radsToDegs(boundary.verts[vert].lng)); + } + + current_offset += boundary.numVerts; + offsets->insert(current_offset); + } + + return ColumnArray::create( + ColumnTuple::create(Columns{std::move(latitude), std::move(longitude)}), + std::move(offsets)); + } +}; + +} + +void registerFunctionH3GetUnidirectionalEdgeBoundary(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 391ab664d52..bcf18b3d128 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -63,6 +63,7 @@ void registerFunctionH3GetOriginIndexFromUnidirectionalEdge(FunctionFactory &); void registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(FunctionFactory &); void registerFunctionH3GetIndexesFromUnidirectionalEdge(FunctionFactory &); void registerFunctionH3GetUnidirectionalEdgesFromHexagon(FunctionFactory &); +void registerFunctionH3GetUnidirectionalEdgeBoundary(FunctionFactory &); #endif @@ -140,6 +141,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3GetDestinationIndexFromUnidirectionalEdge(factory); registerFunctionH3GetIndexesFromUnidirectionalEdge(factory); registerFunctionH3GetUnidirectionalEdgesFromHexagon(factory); + registerFunctionH3GetUnidirectionalEdgeBoundary(factory); #endif #if USE_S2_GEOMETRY From 58c471559ea247e33b2414994b025ff38b412eef Mon Sep 17 00:00:00 2001 From: bharatnc Date: Mon, 2 May 2022 21:44:17 -0700 Subject: [PATCH 036/615] tests for h3GetUnidirectionalEdgeBoundary --- .../02292_h3_get_unidirectional_edge_boundary.reference | 2 ++ .../0_stateless/02292_h3_get_unidirectional_edge_boundary.sql | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.reference create mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.sql diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.reference b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.reference new file mode 100644 index 00000000000..53cf3a9de06 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.reference @@ -0,0 +1,2 @@ +[(37.42012867767779,-122.03773496427027),(37.33755608435299,-122.090428929044)] +[] diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.sql b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.sql new file mode 100644 index 00000000000..1719286ea77 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest + +SELECT h3GetUnidirectionalEdgeBoundary(1248204388774707199); +SELECT h3GetUnidirectionalEdgeBoundary(599686042433355773); \ No newline at end of file From 04913419532c4db1249fe3f3bd77a61df3057700 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Mon, 2 May 2022 21:53:39 -0700 Subject: [PATCH 037/615] docs for h3GetUnidirectionalEdgeBoundary --- docs/en/sql-reference/functions/geo/h3.md | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 0fd11daf6cf..5010ae6de2b 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1247,4 +1247,40 @@ Result: │ [1248204388774707199,1320261982812635135,1392319576850563071,1464377170888491007,1536434764926418943,1608492358964346879] │ └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + +## h3GetUnidirectionalEdgeBoundary {#h3getunidirectionaledgeboundary} + +Returns the coordinates defining the unidirectional edge. + +**Syntax** + +``` sql +h3GetUnidirectionalEdgeBoundary(index) +``` + +**Parameter** + +- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Array of pairs '(lon, lat)'. +Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). + + +**Example** + +Query: + +``` sql + SELECT h3GetUnidirectionalEdgeBoundary(1248204388774707199) as boundary; +``` + +Result: + +``` text +┌─boundary────────────────────────────────────────────────────────────────────────┐ +│ [(37.42012867767779,-122.03773496427027),(37.33755608435299,-122.090428929044)] │ +└─────────────────────────────────────────────────────────────────────────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) From e56f7a14514eb697ebad13163ba655486ffe305f Mon Sep 17 00:00:00 2001 From: bharatnc Date: Mon, 2 May 2022 21:53:53 -0700 Subject: [PATCH 038/615] fix style check --- src/Functions/h3GetUnidirectionalEdgeBoundary.cpp | 1 - src/Functions/h3UnidirectionalEdgeIsValid.cpp | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp b/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp index d99b401e4e9..9dd2d00609e 100644 --- a/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp +++ b/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp @@ -20,7 +20,6 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int INCORRECT_DATA; extern const int ILLEGAL_COLUMN; } diff --git a/src/Functions/h3UnidirectionalEdgeIsValid.cpp b/src/Functions/h3UnidirectionalEdgeIsValid.cpp index 6f517d25985..6b00fba3c5a 100644 --- a/src/Functions/h3UnidirectionalEdgeIsValid.cpp +++ b/src/Functions/h3UnidirectionalEdgeIsValid.cpp @@ -46,8 +46,10 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. Must be UInt64", - arg->getName(), 1, getName()); - + arg->getName(), + 1, + getName()); + return std::make_shared(); } From d899ef2e48a883dd838738e9e125b2c5f3a4d0e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 3 May 2022 19:57:21 +0200 Subject: [PATCH 039/615] HTTP: Always return summary data and exception (when possible) --- .../HTTP/WriteBufferFromHTTPServerResponse.cpp | 11 ++++++++++- src/Server/HTTP/WriteBufferFromHTTPServerResponse.h | 10 ++++++++++ src/Server/HTTPHandler.cpp | 12 +++++++++--- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index 07584075097..c8ae9c6e07c 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -53,11 +53,20 @@ void WriteBufferFromHTTPServerResponse::writeHeaderProgress() *response_header_ostr << "X-ClickHouse-Progress: " << progress_string_writer.str() << "\r\n" << std::flush; } +void WriteBufferFromHTTPServerResponse::writeExceptionCode() +{ + if (headers_finished_sending || !exception_code) + return; + if (response_header_ostr) + *response_header_ostr << "X-ClickHouse-Exception-Code: " << exception_code << "\r\n" << std::flush; +} + void WriteBufferFromHTTPServerResponse::finishSendHeaders() { if (!headers_finished_sending) { writeHeaderSummary(); + writeExceptionCode(); headers_finished_sending = true; if (!is_http_method_head) @@ -150,7 +159,7 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress) accumulated_progress.incrementPiecewiseAtomically(progress); - if (progress_watch.elapsed() >= send_progress_interval_ms * 1000000) + if (send_progress && progress_watch.elapsed() >= send_progress_interval_ms * 1000000) { progress_watch.restart(); diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index 9f1d3e897e3..5c4e306bccd 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -66,12 +66,17 @@ public: add_cors_header = enable_cors; } + /// Send progress + void setSendProgress(bool send_progress_) { send_progress = send_progress_; } + /// Don't send HTTP headers with progress more frequently. void setSendProgressInterval(size_t send_progress_interval_ms_) { send_progress_interval_ms = send_progress_interval_ms_; } + void setExceptionCode(int exception_code_) { exception_code = exception_code_; } + private: /// Send at least HTTP headers if no data has been sent yet. /// Use after the data has possibly been sent and no error happened (and thus you do not plan @@ -88,6 +93,8 @@ private: void writeHeaderProgress(); // Used for write the header X-ClickHouse-Summary void writeHeaderSummary(); + // Use to write the header X-ClickHouse-Exception-Code even when progress has been sent + void writeExceptionCode(); /// This method finish headers with \r\n, allowing to start to send body. void finishSendHeaders(); @@ -113,9 +120,12 @@ private: bool headers_finished_sending = false; /// If true, you could not add any headers. Progress accumulated_progress; + bool send_progress = false; size_t send_progress_interval_ms = 100; Stopwatch progress_watch; + int exception_code; + std::mutex mutex; /// progress callback could be called from different threads. }; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 0ce81ec7be4..e8e50a19ca9 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -770,6 +770,7 @@ void HTTPHandler::processQuery( if (client_supports_http_compression) used_output.out->setCompressionLevel(settings.http_zlib_compression_level); + used_output.out->setSendProgress(settings.send_progress_in_http_headers); used_output.out->setSendProgressInterval(settings.http_headers_progress_interval_ms); /// If 'http_native_compression_disable_checksumming_on_decompress' setting is turned on, @@ -802,8 +803,8 @@ void HTTPHandler::processQuery( }; /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. - if (settings.send_progress_in_http_headers) - append_callback([&used_output] (const Progress & progress) { used_output.out->onProgress(progress); }); + /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` + append_callback([&used_output](const Progress & progress) { used_output.out->onProgress(progress); }); if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) { @@ -842,7 +843,12 @@ void HTTPHandler::trySendExceptionToClient( const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) try { - response.set("X-ClickHouse-Exception-Code", toString(exception_code)); + /// In case data has already been sent, like progress headers, try using the output buffer to + /// set the exception code since it will be able to append it if it hasn't finished writing headers + if (response.sent() && used_output.out) + used_output.out->setExceptionCode(exception_code); + else + response.set("X-ClickHouse-Exception-Code", toString(exception_code)); /// FIXME: make sure that no one else is reading from the same stream at the moment. From 6919b459ee3bb10e5eb6261b03fc8a123aa0b6e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 4 May 2022 14:24:34 +0200 Subject: [PATCH 040/615] HTTP: Return a 408 on query timeout --- src/Server/HTTPHandler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index e8e50a19ca9..8f3a2b1376d 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -103,6 +103,8 @@ namespace ErrorCodes extern const int INVALID_SESSION_TIMEOUT; extern const int HTTP_LENGTH_REQUIRED; extern const int SUPPORT_IS_DISABLED; + + extern const int TIMEOUT_EXCEEDED; } namespace @@ -228,6 +230,10 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti { return HTTPResponse::HTTP_LENGTH_REQUIRED; } + else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED) + { + return HTTPResponse::HTTP_REQUEST_TIMEOUT; + } return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; } From 1fad5b485dd2641f5d4df729a7cd1aac876b97f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 4 May 2022 14:25:18 +0200 Subject: [PATCH 041/615] Add tests --- ...er_full_summary_without_progress.reference | 2 ++ ...tp_header_full_summary_without_progress.sh | 26 +++++++++++++++++++ ...ins_exception_code_with_progress.reference | 1 + ...y_contains_exception_code_with_progress.sh | 24 +++++++++++++++++ 4 files changed, 53 insertions(+) create mode 100644 tests/queries/0_stateless/02293_http_header_full_summary_without_progress.reference create mode 100755 tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh create mode 100644 tests/queries/0_stateless/02293_http_header_summary_contains_exception_code_with_progress.reference create mode 100755 tests/queries/0_stateless/02293_http_header_summary_contains_exception_code_with_progress.sh diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.reference b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.reference new file mode 100644 index 00000000000..538ac795107 --- /dev/null +++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.reference @@ -0,0 +1,2 @@ +Read rows in summary is not zero +< HTTP/1.1 408 Request Time-out diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh new file mode 100755 index 00000000000..8f08bd6f84b --- /dev/null +++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +CURL_OUTPUT=$(echo 'SELECT 1 + sleepEachRow(0.00002) FROM numbers(100000)' | \ + ${CLICKHOUSE_CURL_COMMAND} -v "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) + +READ_ROWS=$(echo "${CURL_OUTPUT}" | \ + grep 'X-ClickHouse-Summary' | \ + awk '{print $3}' | \ + sed -E 's/.*"read_rows":"?([^,"]*)"?.*/\1/' + ) + +if [ "$READ_ROWS" -ne 0 ]; +then + echo "Read rows in summary is not zero" +else + echo "Read rows in summary is zero!" +fi + +# Check that the response code is correct too +echo "${CURL_OUTPUT}" | grep "< HTTP/1.1" diff --git a/tests/queries/0_stateless/02293_http_header_summary_contains_exception_code_with_progress.reference b/tests/queries/0_stateless/02293_http_header_summary_contains_exception_code_with_progress.reference new file mode 100644 index 00000000000..487bd5d5bc3 --- /dev/null +++ b/tests/queries/0_stateless/02293_http_header_summary_contains_exception_code_with_progress.reference @@ -0,0 +1 @@ +Expected exception: 159 diff --git a/tests/queries/0_stateless/02293_http_header_summary_contains_exception_code_with_progress.sh b/tests/queries/0_stateless/02293_http_header_summary_contains_exception_code_with_progress.sh new file mode 100755 index 00000000000..fba136e7c38 --- /dev/null +++ b/tests/queries/0_stateless/02293_http_header_summary_contains_exception_code_with_progress.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +CURL_OUTPUT=$(echo 'SELECT 1 + sleepEachRow(0.00002) FROM numbers(100000)' | \ + ${CLICKHOUSE_CURL_COMMAND} -v "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&max_execution_time=1" --data-binary @- 2>&1) + +EXCEPTION=$(echo "${CURL_OUTPUT}" | grep 'X-ClickHouse-Exception-Code') + +if [[ "$EXCEPTION" =~ .*"159".* ]]; +then + echo "Expected exception: 159" +else + echo "Unexpected exception" + echo "EXCEPTION:" + echo "'${EXCEPTION}'" + echo "DATA:" + echo "$CURL_OUTPUT" +fi + From 5190d9e85a8633ae415b077154f24836b44e92b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 4 May 2022 16:06:43 +0200 Subject: [PATCH 042/615] Give exception_code an initialization value --- src/Server/HTTP/WriteBufferFromHTTPServerResponse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index 5c4e306bccd..6905d5df8b5 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -124,7 +124,7 @@ private: size_t send_progress_interval_ms = 100; Stopwatch progress_watch; - int exception_code; + int exception_code = 0; std::mutex mutex; /// progress callback could be called from different threads. }; From e99d6a3f71b55b6f461b91bcdf65481ba9a7b422 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Wed, 4 May 2022 15:55:05 -0700 Subject: [PATCH 043/615] combine tests to a single file --- ...t_index_from_unidirectional_edge.reference | 2 -- ...et_dest_index_from_unidirectional_edge.sql | 4 ---- ...indexes_from_unidirectional_edge.reference | 2 -- ...3_get_indexes_from_unidirectional_edge.sql | 4 ---- ...n_index_from_unidirectional_edge.reference | 3 --- ..._origin_index_from_unidirectional_edge.sql | 5 ---- ...02292_h3_get_unidirectional_edge.reference | 2 -- .../02292_h3_get_unidirectional_edge.sql | 4 ---- ...get_unidirectional_edge_boundary.reference | 2 -- ...92_h3_get_unidirectional_edge_boundary.sql | 4 ---- ..._get_unidirectional_edges_from_hexagon.sql | 4 ---- .../02292_h3_is_unidirectional_edge.reference | 2 -- .../02292_h3_is_unidirectional_edge.sql | 4 ---- ...> 02292_h3_unidirectional_funcs.reference} | 13 +++++++++++ .../02292_h3_unidirectional_funcs.sql | 23 +++++++++++++++++++ 15 files changed, 36 insertions(+), 42 deletions(-) delete mode 100644 tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.reference delete mode 100644 tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.sql delete mode 100644 tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.reference delete mode 100644 tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.sql delete mode 100644 tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.reference delete mode 100644 tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.sql delete mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edge.reference delete mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edge.sql delete mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.reference delete mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.sql delete mode 100644 tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.sql delete mode 100644 tests/queries/0_stateless/02292_h3_is_unidirectional_edge.reference delete mode 100644 tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql rename tests/queries/0_stateless/{02292_h3_get_unidirectional_edges_from_hexagon.reference => 02292_h3_unidirectional_funcs.reference} (53%) create mode 100644 tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql diff --git a/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.reference deleted file mode 100644 index 02c4e6f71f6..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.reference +++ /dev/null @@ -1,2 +0,0 @@ -599686043507097597 -0 diff --git a/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.sql deleted file mode 100644 index d8dd57508df..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_dest_index_from_unidirectional_edge.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Tags: no-fasttest - -SELECT h3GetDestinationIndexFromUnidirectionalEdge(1248204388774707197); -SELECT h3GetDestinationIndexFromUnidirectionalEdge(599686042433355773); diff --git a/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.reference deleted file mode 100644 index 05e72bbe3ee..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.reference +++ /dev/null @@ -1,2 +0,0 @@ -(599686042433355775,599686043507097599) -(0,0) diff --git a/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.sql deleted file mode 100644 index 5d784291ef8..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_indexes_from_unidirectional_edge.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Tags: no-fasttest - -SELECT h3GetIndexesFromUnidirectionalEdge(1248204388774707199); -SELECT h3GetIndexesFromUnidirectionalEdge(599686042433355775); \ No newline at end of file diff --git a/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.reference deleted file mode 100644 index 1c42d8408b9..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.reference +++ /dev/null @@ -1,3 +0,0 @@ -599686042433355775 -599686042433355773 -0 diff --git a/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.sql deleted file mode 100644 index 370ccca2e44..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_origin_index_from_unidirectional_edge.sql +++ /dev/null @@ -1,5 +0,0 @@ --- Tags: no-fasttest - -SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707199); -SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707197); -SELECT h3GetOriginIndexFromUnidirectionalEdge(599686042433355775); diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.reference deleted file mode 100644 index 5aea166fbd0..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.reference +++ /dev/null @@ -1,2 +0,0 @@ -1248204388774707199 -0 diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.sql deleted file mode 100644 index ddab17e47e9..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Tags: no-fasttest - -select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('85283477fffffff')); -select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('85283473fffffff')); diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.reference b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.reference deleted file mode 100644 index 53cf3a9de06..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.reference +++ /dev/null @@ -1,2 +0,0 @@ -[(37.42012867767779,-122.03773496427027),(37.33755608435299,-122.090428929044)] -[] diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.sql b/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.sql deleted file mode 100644 index 1719286ea77..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_unidirectional_edge_boundary.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Tags: no-fasttest - -SELECT h3GetUnidirectionalEdgeBoundary(1248204388774707199); -SELECT h3GetUnidirectionalEdgeBoundary(599686042433355773); \ No newline at end of file diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.sql b/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.sql deleted file mode 100644 index 5f71a85fb37..00000000000 --- a/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Tags: no-fasttest - -SELECT h3GetUnidirectionalEdgesFromHexagon(1248204388774707199); -SELECT h3GetUnidirectionalEdgesFromHexagon(599686042433355773); \ No newline at end of file diff --git a/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.reference b/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.reference deleted file mode 100644 index b261da18d51..00000000000 --- a/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 -0 diff --git a/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql b/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql deleted file mode 100644 index f64ae292221..00000000000 --- a/tests/queries/0_stateless/02292_h3_is_unidirectional_edge.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Tags: no-fasttest - -SELECT h3UnidirectionalEdgeIsValid(1248204388774707199) as edge; -SELECT h3UnidirectionalEdgeIsValid(1248204388774707197) as edge; diff --git a/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.reference b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.reference similarity index 53% rename from tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.reference rename to tests/queries/0_stateless/02292_h3_unidirectional_funcs.reference index 767c017933e..b0928acc80e 100644 --- a/tests/queries/0_stateless/02292_h3_get_unidirectional_edges_from_hexagon.reference +++ b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.reference @@ -1,2 +1,15 @@ +599686043507097597 +0 +(599686042433355775,599686043507097599) +(0,0) +599686042433355775 +599686042433355773 +0 +[(37.42012867767779,-122.03773496427027),(37.33755608435299,-122.090428929044)] +[] [1248204388774707199,1320261982812635135,1392319576850563071,1464377170888491007,1536434764926418943,1608492358964346879] [1248204388774707197,1320261982812635133,1392319576850563069,1464377170888491005,1536434764926418941,1608492358964346877] +1248204388774707199 +0 +1 +0 diff --git a/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql new file mode 100644 index 00000000000..05b391241e0 --- /dev/null +++ b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql @@ -0,0 +1,23 @@ +-- Tags: no-fasttest + +SELECT h3GetDestinationIndexFromUnidirectionalEdge(1248204388774707197); +SELECT h3GetDestinationIndexFromUnidirectionalEdge(599686042433355773); + +SELECT h3GetIndexesFromUnidirectionalEdge(1248204388774707199); +SELECT h3GetIndexesFromUnidirectionalEdge(599686042433355775); + +SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707199); +SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707197); +SELECT h3GetOriginIndexFromUnidirectionalEdge(599686042433355775); + +SELECT h3GetUnidirectionalEdgeBoundary(1248204388774707199); +SELECT h3GetUnidirectionalEdgeBoundary(599686042433355773); + +SELECT h3GetUnidirectionalEdgesFromHexagon(1248204388774707199); +SELECT h3GetUnidirectionalEdgesFromHexagon(599686042433355773); + +select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('85283477fffffff')); +select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('85283473fffffff')); + +SELECT h3UnidirectionalEdgeIsValid(1248204388774707199) as edge; +SELECT h3UnidirectionalEdgeIsValid(1248204388774707197) as edge; From 966733ef8da4c1d881a03a6a7277be42b08f0afb Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 5 May 2022 23:19:54 +0800 Subject: [PATCH 044/615] rename --- src/Storages/WindowView/StorageWindowView.cpp | 55 ++++++++++--------- src/Storages/WindowView/StorageWindowView.h | 14 ++--- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a329b01e9f2..2a75ac4540e 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -443,9 +443,9 @@ bool StorageWindowView::optimize( const Names & deduplicate_by_columns, ContextPtr local_context) { - auto storage_ptr = getInnerStorage(); + auto storage_ptr = getInnerTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - return getInnerStorage()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); + return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); } std::pair StorageWindowView::getNewBlocks(UInt32 watermark) @@ -455,7 +455,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) InterpreterSelectQuery fetch( getFetchColumnQuery(w_start, watermark), window_view_context, - getInnerStorage(), + getInnerTable(), nullptr, SelectQueryOptions(QueryProcessingStage::FetchColumns)); @@ -490,7 +490,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) auto creator = [&](const StorageID & blocks_id_global) { - auto parent_table_metadata = getParentStorage()->getInMemoryMetadataPtr(); + auto parent_table_metadata = getParentTable()->getInMemoryMetadataPtr(); auto required_columns = parent_table_metadata->getColumns(); required_columns.add(ColumnDescription("____timestamp", std::make_shared())); return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::WithMergeableState); @@ -561,7 +561,7 @@ inline void StorageWindowView::fire(UInt32 watermark) } if (!target_table_id.empty()) { - StoragePtr target_table = getTargetStorage(); + StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); InterpreterInsertQuery interpreter(insert, getContext()); @@ -604,7 +604,7 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( auto t_sample_block = InterpreterSelectQuery( - inner_select_query, window_view_context, getParentStorage(), nullptr, + inner_select_query, window_view_context, getParentTable(), nullptr, SelectQueryOptions(QueryProcessingStage::WithMergeableState)) .getSampleBlock(); auto columns_list = std::make_shared(); @@ -1056,8 +1056,8 @@ StorageWindowView::StorageWindowView( InterpreterCreateQuery create_interpreter(inner_create_query, create_context); create_interpreter.setInternal(true); create_interpreter.execute(); - inner_storage = DatabaseCatalog::instance().getTable(StorageID(inner_create_query->getDatabase(), inner_create_query->getTable()), getContext()); - inner_table_id = inner_storage->getStorageID(); + inner_table = DatabaseCatalog::instance().getTable(StorageID(inner_create_query->getDatabase(), inner_create_query->getTable()), getContext()); + inner_table_id = inner_table->getStorageID(); } clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds(); @@ -1262,7 +1262,7 @@ void StorageWindowView::writeIntoWindowView( auto creator = [&](const StorageID & blocks_id_global) { - auto parent_metadata = window_view.getParentStorage()->getInMemoryMetadataPtr(); + auto parent_metadata = window_view.getParentTable()->getInMemoryMetadataPtr(); auto required_columns = parent_metadata->getColumns(); required_columns.add(ColumnDescription("____timestamp", std::make_shared())); return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::FetchColumns); @@ -1319,11 +1319,12 @@ void StorageWindowView::writeIntoWindowView( }); } - auto inner_storage = window_view.getInnerStorage(); - auto lock = inner_storage->lockForShare( + auto inner_table = window_view.getInnerTable(); + auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); - auto metadata_snapshot = inner_storage->getInMemoryMetadataPtr(); - auto output = inner_storage->write(window_view.getMergeableQuery(), metadata_snapshot, local_context); + auto metadata_snapshot = inner_table->getInMemoryMetadataPtr(); + auto output = inner_table->write(window_view.getMergeableQuery(), metadata_snapshot, local_context); + output->addTableLock(lock); builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) @@ -1356,7 +1357,7 @@ void StorageWindowView::shutdown() auto table_id = getStorageID(); DatabaseCatalog::instance().removeDependency(select_table_id, table_id); - inner_storage.reset(); + inner_table.reset(); } void StorageWindowView::checkTableCanBeDropped() const @@ -1399,7 +1400,7 @@ Block & StorageWindowView::getHeader() const if (!sample_block) { sample_block = InterpreterSelectQuery( - select_query->clone(), window_view_context, getParentStorage(), nullptr, + select_query->clone(), window_view_context, getParentTable(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)).getSampleBlock(); /// convert all columns to full columns /// in case some of them are constant @@ -1411,18 +1412,18 @@ Block & StorageWindowView::getHeader() const return sample_block; } -StoragePtr StorageWindowView::getParentStorage() const +StoragePtr StorageWindowView::getParentTable() const { - if (!parent_storage) - parent_storage = DatabaseCatalog::instance().getTable(select_table_id, getContext()); - return parent_storage; + if (!parent_table) + parent_table = DatabaseCatalog::instance().getTable(select_table_id, getContext()); + return parent_table; } -StoragePtr StorageWindowView::getInnerStorage() const +StoragePtr StorageWindowView::getInnerTable() const { - if (!inner_storage) - inner_storage = DatabaseCatalog::instance().getTable(inner_table_id, getContext()); - return inner_storage; + if (!inner_table) + inner_table = DatabaseCatalog::instance().getTable(inner_table_id, getContext()); + return inner_table; } ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const @@ -1470,11 +1471,11 @@ ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) cons return res_query; } -StoragePtr StorageWindowView::getTargetStorage() const +StoragePtr StorageWindowView::getTargetTable() const { - if (!target_storage && !target_table_id.empty()) - target_storage = DatabaseCatalog::instance().getTable(target_table_id, getContext()); - return target_storage; + if (!target_table&& !target_table_id.empty()) + target_table = DatabaseCatalog::instance().getTable(target_table_id, getContext()); + return target_table; } void registerStorageWindowView(StorageFactory & factory) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index ef552262378..d936b60c1d1 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -203,9 +203,9 @@ private: StorageID select_table_id = StorageID::createEmpty(); StorageID target_table_id = StorageID::createEmpty(); StorageID inner_table_id = StorageID::createEmpty(); - mutable StoragePtr parent_storage; - mutable StoragePtr inner_storage; - mutable StoragePtr target_storage; + mutable StoragePtr parent_table; + mutable StoragePtr inner_table; + mutable StoragePtr target_table; BackgroundSchedulePool::TaskHolder clean_cache_task; BackgroundSchedulePool::TaskHolder fire_task; @@ -236,11 +236,9 @@ private: ASTPtr getFinalQuery() const { return final_query->clone(); } ASTPtr getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const; - StoragePtr getParentStorage() const; - - StoragePtr getInnerStorage() const; - - StoragePtr getTargetStorage() const; + StoragePtr getParentTable() const; + StoragePtr getInnerTable() const; + StoragePtr getTargetTable() const; Block & getHeader() const; From a7dc1f48e52f86dd64345433fff1fdf82daa806f Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 5 May 2022 23:31:15 +0800 Subject: [PATCH 045/615] populate support for windowview --- src/Interpreters/InterpreterCreateQuery.cpp | 8 ++ src/Parsers/ParserCreateQuery.cpp | 6 ++ src/Storages/WindowView/StorageWindowView.cpp | 94 ++++++++++++++++++- src/Storages/WindowView/StorageWindowView.h | 2 + 4 files changed, 105 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ed996430996..d7d3b1456ab 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -1358,6 +1359,13 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) return InterpreterInsertQuery(insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); } + else if (create.select && !create.attach && create.is_window_view && create.is_populate) + { + auto table = DatabaseCatalog::instance().getTable({create.getDatabase(), create.getTable(), create.uuid}, getContext()); + if (auto * window_view = dynamic_cast(table.get())) + return window_view->populate(); + return {}; + } return {}; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 9c9989dc39f..7351e1e29ba 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -826,6 +826,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_window("WINDOW"); + ParserKeyword s_populate("POPULATE"); ParserToken s_dot(TokenType::Dot); ParserToken s_eq(TokenType::Equals); ParserToken s_lparen(TokenType::OpeningRoundBracket); @@ -853,6 +854,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & bool is_watermark_bounded = false; bool allowed_lateness = false; bool if_not_exists = false; + bool is_populate = false; if (!s_create.ignore(pos, expected)) { @@ -925,6 +927,9 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } + if (s_populate.ignore(pos, expected)) + is_populate = true; + /// AS SELECT ... if (!s_as.ignore(pos, expected)) return false; @@ -957,6 +962,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->watermark_function = watermark; query->allowed_lateness = allowed_lateness; query->lateness_function = lateness; + query->is_populate = is_populate; tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 2a75ac4540e..79d06a57267 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -12,15 +12,18 @@ #include #include #include +#include #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -41,6 +44,7 @@ #include #include #include +#include #include #include @@ -51,6 +55,12 @@ #include +namespace ProfileEvents +{ + extern const Event SelectedBytes; + extern const Event SelectedRows; +} + namespace DB { namespace ErrorCodes @@ -818,11 +828,6 @@ void StorageWindowView::updateMaxTimestamp(UInt32 timestamp) void StorageWindowView::updateMaxWatermark(UInt32 watermark) { std::lock_guard lock(fire_signal_mutex); - if (max_watermark == 0) - { - max_watermark = getWindowUpperBound(watermark - 1); - return; - } bool updated; if (is_watermark_strictly_ascending) @@ -1161,9 +1166,88 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } } +class PushingToWindowViewSink final : public SinkToStorage +{ +public: + PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); + String getName() const override { return "PushingToWindowViewSink"; } + void consume(Chunk chunk) override; + +private: + StorageWindowView & window_view; + StoragePtr storage_holder; + ContextPtr context; +}; + +BlockIO StorageWindowView::populate() +{ + if(is_time_column_func_now) + throw Exception( + ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "POPULATE is not supported when using function now() as the time column"); + + auto select_query_ = select_query->clone(); + auto & modified_select = select_query_->as(); + + auto analyzer_res = TreeRewriterResult({}); + removeJoin(modified_select, analyzer_res, getContext()); + + modified_select.setExpression(ASTSelectQuery::Expression::HAVING, {}); + modified_select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); + + auto select = std::make_shared(); + select->children.push_back(std::make_shared()); + modified_select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(select)); + + auto order_by = std::make_shared(); + auto order_by_elem = std::make_shared(); + order_by_elem->children.push_back(std::make_shared(timestamp_column_name)); + order_by_elem->direction = 1; + order_by->children.push_back(order_by_elem); + modified_select.setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_by)); + + QueryPipelineBuilder pipeline; + + /// Passing 1 as subquery_depth will disable limiting size of intermediate result. + InterpreterSelectQuery interpreter_select{ + select_query_ , getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)}; + pipeline = interpreter_select.buildQueryPipeline(); + + auto header_block + = InterpreterSelectQuery( + select_query_->clone(), getContext(), getParentTable(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)) + .getSampleBlock(); + + auto sink = std::make_shared(header_block, *this, nullptr, getContext()); + + BlockIO res; + + pipeline.addChain(Chain(std::move(sink))); + pipeline.setMaxThreads(1); + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); + + res.pipeline.addStorageHolder(shared_from_this()); + res.pipeline.addStorageHolder(getInnerTable()); + + return res; +} + void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, const Block & block, ContextPtr local_context) { + if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) + { + std::lock_guard lock(window_view.fire_signal_mutex); + const auto & window_column = block.getByName(window_view.timestamp_column_name); + const ColumnUInt32::Container & window_end_data = static_cast(*window_column.column).getData(); + UInt32 first_record_timestamp = window_end_data[0]; + window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); + } + Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); UInt32 lateness_bound = 0; diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index d936b60c1d1..dd5c43362d1 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -143,6 +143,8 @@ public: std::pair getNewBlocks(UInt32 watermark); + BlockIO populate(); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } From b213b1e75222101d520127c9dbad13d594271b55 Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 5 May 2022 15:50:41 +0000 Subject: [PATCH 046/615] add test --- ...iew_event_tumble_to_asc_populate.reference | 17 +++++++ ...indow_view_event_tumble_to_asc_populate.sh | 50 +++++++++++++++++++ ...w_event_tumble_asc_join_populate.reference | 3 ++ ...dow_view_event_tumble_asc_join_populate.sh | 40 +++++++++++++++ 4 files changed, 110 insertions(+) create mode 100644 tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.reference create mode 100755 tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh create mode 100644 tests/queries/0_stateless/01074_window_view_event_tumble_asc_join_populate.reference create mode 100755 tests/queries/0_stateless/01074_window_view_event_tumble_asc_join_populate.sh diff --git a/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.reference b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.reference new file mode 100644 index 00000000000..ba7551b2578 --- /dev/null +++ b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.reference @@ -0,0 +1,17 @@ +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +1 4 1990-01-01 12:00:10 +1 5 1990-01-01 12:00:10 +1 6 1990-01-01 12:00:15 +1 7 1990-01-01 12:00:15 +------ +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +1 4 1990-01-01 12:00:10 +1 5 1990-01-01 12:00:10 +1 6 1990-01-01 12:00:15 +1 7 1990-01-01 12:00:15 +1 8 1990-01-01 12:00:35 +2 8 1990-01-01 12:00:40 diff --git a/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh new file mode 100755 index 00000000000..d7ce729ae57 --- /dev/null +++ b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery < Date: Fri, 6 May 2022 11:08:59 +0800 Subject: [PATCH 047/615] update --- src/Storages/WindowView/StorageWindowView.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 984315d8d73..17482c82c1e 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -476,7 +476,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) InterpreterSelectQuery fetch( getFetchColumnQuery(w_start, watermark), getContext(), - getInnerStorage(), + getInnerTable(), nullptr, SelectQueryOptions(QueryProcessingStage::FetchColumns)); @@ -625,7 +625,7 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( auto t_sample_block = InterpreterSelectQuery( - inner_select_query, getContext(), getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::WithMergeableState)) + inner_select_query, getContext(), getParentTable(), nullptr, SelectQueryOptions(QueryProcessingStage::WithMergeableState)) .getSampleBlock(); auto columns_list = std::make_shared(); @@ -1514,7 +1514,7 @@ Block & StorageWindowView::getHeader() const { sample_block = InterpreterSelectQuery( - select_query->clone(), getContext(), getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)) + select_query->clone(), getContext(), getParentTable(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)) .getSampleBlock(); /// convert all columns to full columns /// in case some of them are constant From 38fec0ed4ddfa682a59f179a28bd4212e291b114 Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 6 May 2022 14:30:31 +0800 Subject: [PATCH 048/615] update code style --- src/Storages/WindowView/StorageWindowView.cpp | 25 +++++++++---------- src/Storages/WindowView/StorageWindowView.h | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 17482c82c1e..958b4886487 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1210,12 +1210,12 @@ private: BlockIO StorageWindowView::populate() { - if(is_time_column_func_now) + if (is_time_column_func_now) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "POPULATE is not supported when using function now() as the time column"); - auto select_query_ = select_query->clone(); - auto & modified_select = select_query_->as(); + auto modified_query = select_query->clone(); + auto & modified_select = modified_query->as(); auto analyzer_res = TreeRewriterResult({}); removeJoin(modified_select, analyzer_res, getContext()); @@ -1236,10 +1236,9 @@ BlockIO StorageWindowView::populate() QueryPipelineBuilder pipeline; - /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - InterpreterSelectQuery interpreter_select{ - select_query_ , getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)}; - pipeline = interpreter_select.buildQueryPipeline(); + /// Passing 1 as subquery_depth will disable limiting size of intermediate result. + InterpreterSelectQuery interpreter_select{modified_query, getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)}; + pipeline = interpreter_select.buildQueryPipeline(); auto header_block = InterpreterSelectQuery( @@ -1251,13 +1250,13 @@ BlockIO StorageWindowView::populate() BlockIO res; pipeline.addChain(Chain(std::move(sink))); - pipeline.setMaxThreads(1); - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); + pipeline.setMaxThreads(1); + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); - res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); res.pipeline.addStorageHolder(shared_from_this()); res.pipeline.addStorageHolder(getInnerTable()); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 10142f62dd1..9704dd2ae09 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -148,7 +148,7 @@ public: std::pair getNewBlocks(UInt32 watermark); - BlockIO populate(); + BlockIO populate(); static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); From 5a8479446a7d52d7d8d0c665ccba1c03d22f3a93 Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 6 May 2022 14:35:10 +0800 Subject: [PATCH 049/615] simplify code --- src/Storages/WindowView/StorageWindowView.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 958b4886487..85ea9ee214e 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -55,12 +55,6 @@ #include -namespace ProfileEvents -{ - extern const Event SelectedBytes; - extern const Event SelectedRows; -} - namespace DB { namespace ErrorCodes @@ -1240,11 +1234,7 @@ BlockIO StorageWindowView::populate() InterpreterSelectQuery interpreter_select{modified_query, getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)}; pipeline = interpreter_select.buildQueryPipeline(); - auto header_block - = InterpreterSelectQuery( - select_query_->clone(), getContext(), getParentTable(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)) - .getSampleBlock(); - + auto header_block = interpreter_select.getSampleBlock(); auto sink = std::make_shared(header_block, *this, nullptr, getContext()); BlockIO res; From a0368559e661077a52fb279426a5b59388b97e4d Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 6 May 2022 14:39:37 +0800 Subject: [PATCH 050/615] remove blank lines --- .../01073_window_view_event_tumble_to_asc_populate.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh index d7ce729ae57..0845be093d5 100755 --- a/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh +++ b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh @@ -37,14 +37,12 @@ INSERT INTO mt VALUES (1, 8, '1990/01/01 12:00:42'); SELECT '------'; EOF - while true; do $CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "9" && break || sleep .5 ||: done $CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY market, w_end;" - $CLICKHOUSE_CLIENT --query="DROP TABLE wv" $CLICKHOUSE_CLIENT --query="DROP TABLE mt" $CLICKHOUSE_CLIENT --query="DROP TABLE dst" From 62a7ba3f2635cd3dea4c81b41f8f6d330d2c3cb0 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 6 May 2022 16:48:48 +0000 Subject: [PATCH 051/615] Add columnar JSON formats --- src/Core/Block.cpp | 20 ++ src/Core/Block.h | 3 + src/Core/BlockInfo.cpp | 6 + src/Core/BlockInfo.h | 1 + src/Core/Settings.h | 2 + src/Formats/EscapingRuleUtils.cpp | 8 +- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 5 + src/Formats/JSONEachRowUtils.h | 37 --- .../{JSONEachRowUtils.cpp => JSONUtils.cpp} | 210 +++++++++++++- src/Formats/JSONUtils.h | 97 +++++++ src/Formats/registerFormats.cpp | 14 + src/IO/ReadHelpers.cpp | 5 +- src/IO/ReadHelpers.h | 4 +- src/Processors/Chunk.cpp | 11 + src/Processors/Chunk.h | 2 + src/Processors/Formats/IRowInputFormat.cpp | 7 +- src/Processors/Formats/IRowOutputFormat.cpp | 6 +- src/Processors/Formats/ISchemaReader.cpp | 12 +- src/Processors/Formats/ISchemaReader.h | 11 + .../Formats/Impl/ArrowBlockInputFormat.cpp | 5 +- .../Impl/JSONAsStringRowInputFormat.cpp | 2 +- .../Impl/JSONColumnsBaseBlockInputFormat.cpp | 265 ++++++++++++++++++ .../Impl/JSONColumnsBaseBlockInputFormat.h | 90 ++++++ .../Impl/JSONColumnsBaseBlockOutputFormat.cpp | 82 ++++++ .../Impl/JSONColumnsBaseBlockOutputFormat.h | 48 ++++ .../Impl/JSONColumnsBlockInputFormat.cpp | 70 +++++ .../Impl/JSONColumnsBlockInputFormat.h | 25 ++ .../Impl/JSONColumnsBlockOutputFormat.cpp | 54 ++++ .../Impl/JSONColumnsBlockOutputFormat.h | 35 +++ ...ONColumnsWithMetadataBlockOutputFormat.cpp | 114 ++++++++ ...JSONColumnsWithMetadataBlockOutputFormat.h | 66 +++++ .../JSONCompactColumnsBlockInputFormat.cpp | 65 +++++ .../Impl/JSONCompactColumnsBlockInputFormat.h | 25 ++ .../JSONCompactColumnsBlockOutputFormat.cpp | 47 ++++ .../JSONCompactColumnsBlockOutputFormat.h | 33 +++ .../Impl/JSONCompactEachRowRowInputFormat.cpp | 2 +- .../Impl/JSONCompactRowOutputFormat.cpp | 53 ++-- .../Formats/Impl/JSONCompactRowOutputFormat.h | 7 +- .../Impl/JSONEachRowRowInputFormat.cpp | 2 +- .../Formats/Impl/JSONRowOutputFormat.cpp | 189 +++---------- .../Formats/Impl/JSONRowOutputFormat.h | 5 - .../Impl/LineAsStringRowInputFormat.cpp | 2 +- .../Formats/Impl/MySQLDumpRowInputFormat.cpp | 4 +- .../Impl/ODBCDriver2BlockOutputFormat.cpp | 14 +- .../Impl/ODBCDriver2BlockOutputFormat.h | 3 +- .../Formats/Impl/ORCBlockInputFormat.cpp | 6 +- .../Formats/Impl/ParquetBlockInputFormat.cpp | 5 +- .../Formats/Impl/PrettyBlockOutputFormat.cpp | 14 +- .../Formats/Impl/PrettyBlockOutputFormat.h | 3 +- .../Impl/PrettyCompactBlockOutputFormat.cpp | 21 +- .../Impl/PrettyCompactBlockOutputFormat.h | 3 +- .../Impl/PrettySpaceBlockOutputFormat.cpp | 2 +- .../Impl/PrettySpaceBlockOutputFormat.h | 2 +- .../Formats/Impl/TSKVRowOutputFormat.cpp | 6 +- .../Impl/TemplateBlockOutputFormat.cpp | 9 +- .../Formats/Impl/ValuesBlockInputFormat.cpp | 7 +- .../Formats/Impl/XMLRowOutputFormat.cpp | 5 +- 58 files changed, 1503 insertions(+), 349 deletions(-) delete mode 100644 src/Formats/JSONEachRowUtils.h rename src/Formats/{JSONEachRowUtils.cpp => JSONUtils.cpp} (70%) create mode 100644 src/Formats/JSONUtils.h create mode 100644 src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h create mode 100644 src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h create mode 100644 src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h create mode 100644 src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h create mode 100644 src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h create mode 100644 src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h create mode 100644 src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index f5764262c66..76ded05a84c 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -588,6 +588,15 @@ NamesAndTypesList Block::getNamesAndTypesList() const return res; } +NamesAndTypes Block::getNamesAndTypes() const +{ + NamesAndTypes res; + + for (const auto & elem : data) + res.emplace_back(elem.name, elem.type); + + return res; +} Names Block::getNames() const { @@ -743,6 +752,17 @@ void Block::updateHash(SipHash & hash) const col.column->updateHashWithValue(row_no, hash); } +Serializations Block::getSerializations() const +{ + Serializations res; + res.reserve(data.size()); + + for (const auto & column : data) + res.push_back(column.type->getDefaultSerialization()); + + return res; +} + void convertToFullIfSparse(Block & block) { for (auto & column : block) diff --git a/src/Core/Block.h b/src/Core/Block.h index 8089dffd1dc..8564d9bad65 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -89,11 +89,14 @@ public: const ColumnsWithTypeAndName & getColumnsWithTypeAndName() const; NamesAndTypesList getNamesAndTypesList() const; + NamesAndTypes getNamesAndTypes() const; Names getNames() const; DataTypes getDataTypes() const; Names getDataTypeNames() const; std::unordered_map getNamesToIndexesMap() const; + Serializations getSerializations() const; + /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. size_t rows() const; diff --git a/src/Core/BlockInfo.cpp b/src/Core/BlockInfo.cpp index e9aee871be1..ae32e8aa579 100644 --- a/src/Core/BlockInfo.cpp +++ b/src/Core/BlockInfo.cpp @@ -65,6 +65,12 @@ void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) mask[row_idx] = true; } +void BlockMissingValues::setBits(size_t column_idx, size_t rows) +{ + RowsBitMask & mask = rows_mask_by_column_id[column_idx]; + mask.resize(rows, true); +} + const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const { static RowsBitMask none; diff --git a/src/Core/BlockInfo.h b/src/Core/BlockInfo.h index 4e06edc65be..649bc9cd2d7 100644 --- a/src/Core/BlockInfo.h +++ b/src/Core/BlockInfo.h @@ -57,6 +57,7 @@ public: /// Check that we have to replace default value at least in one of columns bool hasDefaultBits(size_t column_idx) const; void setBit(size_t column_idx, size_t row_idx); + void setBits(size_t column_idx, size_t rows); bool empty() const { return rows_mask_by_column_id.empty(); } size_t size() const { return rows_mask_by_column_id.size(); } void clear() { rows_mask_by_column_id.clear(); } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index be73465eea0..958eba86a77 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -730,6 +730,8 @@ class IColumn; \ M(String, input_format_mysql_dump_table_name, "", "Name of the table in MySQL dump from which to read data", 0) \ M(Bool, input_format_mysql_dump_map_column_names, true, "Match columns from table in MySQL dump and columns from ClickHouse table by names", 0) \ + \ + M(UInt64, output_format_json_columns_max_rows_to_buffer, 10000, "Max rows to buffer in JSONColumnsMonoBlock, JSONCompactColumnsMonoBlock and JSONColumnsWithMetadata format", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 1875caf1855..146043456bd 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -83,7 +83,7 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca readEscapedString(tmp, buf); break; case FormatSettings::EscapingRule::Quoted: - readQuotedFieldIntoString(tmp, buf); + readQuotedField(tmp, buf); break; case FormatSettings::EscapingRule::CSV: readCSVString(tmp, buf, format_settings.csv); @@ -219,13 +219,13 @@ String readByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escapin if constexpr (read_string) readQuotedString(result, buf); else - readQuotedFieldIntoString(result, buf); + readQuotedField(result, buf); break; case FormatSettings::EscapingRule::JSON: if constexpr (read_string) readJSONString(result, buf); else - readJSONFieldIntoString(result, buf); + readJSONField(result, buf); break; case FormatSettings::EscapingRule::Raw: readString(result, buf); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 96b52cd2423..d70200aac4a 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -152,6 +152,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; + format_settings.json_columns.max_rows_to_buffer = settings.output_format_json_columns_max_rows_to_buffer; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4f77fe099e1..13556244410 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -268,6 +268,11 @@ struct FormatSettings String table_name; bool map_column_names = true; } mysql_dump; + + struct + { + size_t max_rows_to_buffer = 10000; + } json_columns; }; } diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h deleted file mode 100644 index 46c343f356a..00000000000 --- a/src/Formats/JSONEachRowUtils.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); -std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); - - -/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. -/// JSON array with different nested types is treated as Tuple. -/// If cannot convert (for example when field contains null), return nullptr. -DataTypePtr getDataTypeFromJSONField(const String & field); - -/// Read row in JSONEachRow format and try to determine type for each field. -/// Return list of names and types. -/// If cannot determine the type of some field, return nullptr for it. -NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings); - -/// Read row in JSONCompactEachRow format and try to determine type for each field. -/// If cannot determine the type of some field, return nullptr for it. -DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings); - -bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); - -bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings); - -DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers); - -} diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONUtils.cpp similarity index 70% rename from src/Formats/JSONEachRowUtils.cpp rename to src/Formats/JSONUtils.cpp index 534237c900c..eb9a78ad734 100644 --- a/src/Formats/JSONEachRowUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -1,7 +1,8 @@ #include -#include +#include #include #include +#include #include #include #include @@ -212,7 +213,7 @@ DataTypePtr getDataTypeFromJSONField(const String & field) auto [parser, element] = getJSONParserAndElement(); bool parsed = parser.parse(field, element); if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object"); + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field); return getDataTypeFromJSONFieldImpl(element); } @@ -224,7 +225,7 @@ static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in auto [parser, element] = getJSONParserAndElement(); bool parsed = parser.parse(line, element); if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object"); + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", line); auto fields = extractor.extract(element); @@ -384,4 +385,207 @@ DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTyp return nullptr; } +void writeJSONFieldDelimiter(WriteBuffer & out, size_t new_lines) +{ + writeChar(',', out); + writeChar('\n', new_lines, out); +} + +void writeJSONFieldCompactDelimiter(WriteBuffer & out) +{ + writeCString(", ", out); +} + +template +void writeJSONTitle(const char * title, WriteBuffer & out, size_t indent) +{ + writeChar('\t', indent, out); + writeChar('"', out); + writeCString(title, out); + if constexpr (with_space) + writeCString("\": ", out); + else + writeCString("\":\n", out); +} + +void writeJSONObjectStart(WriteBuffer & out, size_t indent, const char * title) +{ + if (title) + writeJSONTitle(title, out, indent); + writeChar('\t', indent, out); + writeCString("{\n", out); +} + +void writeJSONObjectEnd(WriteBuffer & out, size_t indent) +{ + writeChar('\n', out); + writeChar('\t', indent, out); + writeChar('}', out); +} + +void writeJSONArrayStart(WriteBuffer & out, size_t indent, const char * title) +{ + if (title) + writeJSONTitle(title, out, indent); + writeChar('\t', indent, out); + writeCString("[\n", out); +} + +void writeJSONCompactArrayStart(WriteBuffer & out, size_t indent, const char * title) +{ + if (title) + writeJSONTitle(title, out, indent); + else + writeChar('\t', indent, out); + writeCString("[", out); +} + +void writeJSONArrayEnd(WriteBuffer & out, size_t indent) +{ + writeChar('\n', out); + writeChar('\t', indent, out); + writeChar(']', out); +} + +void writeJSONCompactArrayEnd(WriteBuffer & out) +{ + writeChar(']', out); +} + +void writeJSONFieldFromColumn( + const IColumn & column, + const ISerialization & serialization, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + const std::optional & name, + size_t indent) +{ + if (name.has_value()) + writeJSONTitle(name->data(), out, indent); + + if (yield_strings) + { + WriteBufferFromOwnString buf; + + serialization.serializeText(column, row_num, buf, settings); + writeJSONString(buf.str(), out, settings); + } + else + serialization.serializeTextJSON(column, row_num, out, settings); +} + +void writeJSONColumns( + const Columns & columns, + const NamesAndTypes & fields, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + size_t indent) +{ + for (size_t i = 0; i < columns.size(); ++i) + { + if (i != 0) + writeJSONFieldDelimiter(out); + writeJSONFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out, fields[i].name, indent); + } +} + +void writeJSONCompactColumns( + const Columns & columns, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out) +{ + for (size_t i = 0; i < columns.size(); ++i) + { + if (i != 0) + writeJSONFieldCompactDelimiter(out); + writeJSONFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out); + } +} + +void writeJSONMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out) +{ + writeJSONArrayStart(out, 1, "meta"); + + for (size_t i = 0; i < fields.size(); ++i) + { + writeJSONObjectStart(out, 2); + + writeJSONTitle("name", out, 3); + writeDoubleQuoted(fields[i].name, out); + writeJSONFieldDelimiter(out); + writeJSONTitle("type", out, 3); + writeJSONString(fields[i].type->getName(), out, settings); + writeJSONObjectEnd(out, 2); + + if (i + 1 < fields.size()) + writeJSONFieldDelimiter(out); + } + + writeJSONArrayEnd(out, 1); +} + +void writeJSONAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out) +{ + writeJSONFieldDelimiter(out, 2); + writeJSONTitle("rows", out, 1); + writeIntText(rows, out); + + if (applied_limit) + { + writeJSONFieldDelimiter(out, 2); + writeJSONTitle("rows_before_limit_at_least", out, 1); + writeIntText(rows_before_limit, out); + } + + if (write_statistics) + { + writeJSONFieldDelimiter(out, 2); + writeJSONObjectStart(out, 1, "statistics"); + + writeJSONTitle("elapsed", out, 2); + writeText(watch.elapsedSeconds(), out); + writeJSONFieldDelimiter(out); + + writeJSONTitle("rows_read", out, 2); + writeText(progress.read_rows.load(), out); + writeJSONFieldDelimiter(out); + + writeJSONTitle("bytes_read", out, 2); + writeText(progress.read_bytes.load(), out); + + writeJSONObjectEnd(out, 1); + } +} + +void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8) +{ + for (auto & field : fields) + { + if (!field.type->textCanContainOnlyValidUTF8()) + need_validate_utf8 = true; + + WriteBufferFromOwnString buf; + { + WriteBufferValidUTF8 validating_buf(buf); + writeJSONString(field.name, validating_buf, settings); + } + field.name = buf.str().substr(1, buf.str().size() - 2); + } +} + } diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h new file mode 100644 index 00000000000..b4b34498311 --- /dev/null +++ b/src/Formats/JSONUtils.h @@ -0,0 +1,97 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); +std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); + + +/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. +/// JSON array with different nested types is treated as Tuple. +/// If cannot convert (for example when field contains null), return nullptr. +DataTypePtr getDataTypeFromJSONField(const String & field); + +/// Read row in JSONEachRow format and try to determine type for each field. +/// Return list of names and types. +/// If cannot determine the type of some field, return nullptr for it. +NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings); + +/// Read row in JSONCompactEachRow format and try to determine type for each field. +/// If cannot determine the type of some field, return nullptr for it. +DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings); + +bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); + +bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings); + +DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers); + +void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8); + + +/// Functions helpers for writing JSON data to WriteBuffer. + +void writeJSONFieldDelimiter(WriteBuffer & out, size_t new_lines = 1); + +void writeJSONFieldCompactDelimiter(WriteBuffer & out); + +void writeJSONObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + +void writeJSONObjectEnd(WriteBuffer & out, size_t indent = 0); + +void writeJSONArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + +void writeJSONCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + +void writeJSONArrayEnd(WriteBuffer & out, size_t indent = 0); + +void writeJSONCompactArrayEnd(WriteBuffer & out); + +void writeJSONFieldFromColumn( + const IColumn & column, + const ISerialization & serialization, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + const std::optional & name = std::nullopt, + size_t indent = 0); + +void writeJSONColumns(const Columns & columns, + const NamesAndTypes & fields, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + size_t indent = 0); + +void writeJSONCompactColumns(const Columns & columns, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out); + +void writeJSONMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out); + +void writeJSONAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out); +} diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 6797b967baa..9d0bf663715 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -60,6 +60,10 @@ void registerInputFormatCustomSeparated(FormatFactory & factory); void registerOutputFormatCustomSeparated(FormatFactory & factory); void registerInputFormatCapnProto(FormatFactory & factory); void registerOutputFormatCapnProto(FormatFactory & factory); +void registerInputFormatJSONColumns(FormatFactory & factory); +void registerOutputFormatJSONColumns(FormatFactory & factory); +void registerInputFormatJSONCompactColumns(FormatFactory & factory); +void registerOutputFormatJSONCompactColumns(FormatFactory & factory); /// Output only (presentational) formats. @@ -77,6 +81,7 @@ void registerOutputFormatMySQLWire(FormatFactory & factory); void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); void registerOutputFormatPrometheus(FormatFactory & factory); +void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory); /// Input only formats. @@ -119,6 +124,8 @@ void registerTSKVSchemaReader(FormatFactory & factory); void registerValuesSchemaReader(FormatFactory & factory); void registerTemplateSchemaReader(FormatFactory & factory); void registerMySQLSchemaReader(FormatFactory & factory); +void registerJSONColumnsSchemaReader(FormatFactory & factory); +void registerJSONCompactColumnsSchemaReader(FormatFactory & factory); void registerFileExtensions(FormatFactory & factory); @@ -176,6 +183,10 @@ void registerFormats() registerOutputFormatAvro(factory); registerInputFormatArrow(factory); registerOutputFormatArrow(factory); + registerInputFormatJSONColumns(factory); + registerOutputFormatJSONColumns(factory); + registerInputFormatJSONCompactColumns(factory); + registerOutputFormatJSONCompactColumns(factory); registerOutputFormatPretty(factory); registerOutputFormatPrettyCompact(factory); @@ -192,6 +203,7 @@ void registerFormats() registerOutputFormatPostgreSQLWire(factory); registerOutputFormatCapnProto(factory); registerOutputFormatPrometheus(factory); + registerOutputFormatJSONColumnsWithMetadata(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); @@ -232,6 +244,8 @@ void registerFormats() registerValuesSchemaReader(factory); registerTemplateSchemaReader(factory); registerMySQLSchemaReader(factory); + registerJSONColumnsSchemaReader(factory); + registerJSONCompactColumnsSchemaReader(factory); } } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 85c5ff2ec48..1bf8c32b6af 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1363,7 +1363,7 @@ static void readQuotedFieldInBrackets(String & s, ReadBuffer & buf) } } -void readQuotedFieldIntoString(String & s, ReadBuffer & buf) +void readQuotedField(String & s, ReadBuffer & buf) { s.clear(); @@ -1427,8 +1427,9 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf) } } -void readJSONFieldIntoString(String & s, ReadBuffer & buf) +void readJSONField(String & s, ReadBuffer & buf) { + s.clear(); auto parse_func = [](ReadBuffer & in) { skipJSONField(in, "json_field"); }; readParsedValueIntoString(s, buf, parse_func); } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index c5ffa52c9b3..496b8000441 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1425,8 +1425,8 @@ struct PcgDeserializer } }; -void readQuotedFieldIntoString(String & s, ReadBuffer & buf); +void readQuotedField(String & s, ReadBuffer & buf); -void readJSONFieldIntoString(String & s, ReadBuffer & buf); +void readJSONField(String & s, ReadBuffer & buf); } diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 8c7c09abf01..7f46a4140de 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -169,6 +169,17 @@ std::string Chunk::dumpStructure() const return out.str(); } +void Chunk::append(const Chunk & chunk, size_t length) +{ + MutableColumns mutation = mutateColumns(); + for (size_t position = 0; position < mutation.size(); ++position) + { + auto column = chunk.getColumns()[position]; + mutation[position]->insertRangeFrom(*column, 0, std::min(length, column->size())); + } + size_t rows = mutation[0]->size(); + setColumns(std::move(mutation), rows); +} void ChunkMissingValues::setBit(size_t column_idx, size_t row_idx) { diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 1c9240ba114..ddf3971fb43 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -101,6 +101,8 @@ public: std::string dumpStructure() const; + void append(const Chunk & chunk, size_t length); + private: Columns columns; UInt64 num_rows = 0; diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 481f77c1ef8..dbe28147d8f 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -48,13 +48,8 @@ bool isParseError(int code) } IRowInputFormat::IRowInputFormat(Block header, ReadBuffer & in_, Params params_) - : IInputFormat(std::move(header), in_), params(params_) + : IInputFormat(std::move(header), in_), serializations(getPort().getHeader().getSerializations()), params(params_) { - const auto & port_header = getPort().getHeader(); - size_t num_columns = port_header.columns(); - serializations.resize(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = port_header.getByPosition(i).type->getDefaultSerialization(); } diff --git a/src/Processors/Formats/IRowOutputFormat.cpp b/src/Processors/Formats/IRowOutputFormat.cpp index b48c4a2b3e6..f2f6b49ed3f 100644 --- a/src/Processors/Formats/IRowOutputFormat.cpp +++ b/src/Processors/Formats/IRowOutputFormat.cpp @@ -12,13 +12,11 @@ namespace ErrorCodes IRowOutputFormat::IRowOutputFormat(const Block & header, WriteBuffer & out_, const Params & params_) : IOutputFormat(header, out_) + , num_columns(header.columns()) , types(header.getDataTypes()) + , serializations(header.getSerializations()) , params(params_) { - num_columns = types.size(); - serializations.reserve(num_columns); - for (const auto & type : types) - serializations.push_back(type->getDefaultSerialization()); } void IRowOutputFormat::consume(DB::Chunk chunk) diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index f23f33c482d..0ccc498baca 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes extern const int EMPTY_DATA_PASSED; } -static void chooseResultType( +void chooseResultColumnType( DataTypePtr & type, const DataTypePtr & new_type, CommonDataTypeChecker common_type_checker, @@ -48,7 +48,7 @@ static void chooseResultType( } } -static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t max_rows_to_read) +void checkResultColumnTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t max_rows_to_read) { if (!type) { @@ -111,7 +111,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() if (!new_data_types[i]) continue; - chooseResultType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), row); + chooseResultColumnType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), row); } } @@ -136,7 +136,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() for (size_t i = 0; i != data_types.size(); ++i) { /// Check that we could determine the type of this column. - checkTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), max_rows_to_read); + checkResultColumnTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), max_rows_to_read); } return result; @@ -189,7 +189,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() } auto & type = it->second; - chooseResultType(type, new_type, common_type_checker, default_type, name, row); + chooseResultColumnType(type, new_type, common_type_checker, default_type, name, row); } } @@ -202,7 +202,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() { auto & type = names_to_types[name]; /// Check that we could determine the type of this column. - checkTypeAndAppend(result, type, name, default_type, max_rows_to_read); + checkResultColumnTypeAndAppend(result, type, name, default_type, max_rows_to_read); } return result; diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index a8eff762856..f3803bc98de 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -109,4 +109,15 @@ public: virtual ~IExternalSchemaReader() = default; }; +void chooseResultColumnType( + DataTypePtr & type, + const DataTypePtr & new_type, + CommonDataTypeChecker common_type_checker, + const DataTypePtr & default_type, + const String & column_name, + size_t row); + +void checkResultColumnTypeAndAppend( + NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t max_rows_to_read); + } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 07331d82bb8..da3e3efe807 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -76,9 +76,8 @@ Chunk ArrowBlockInputFormat::generate() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) - for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) - for (const auto & column_idx : missing_columns) - block_missing_values.setBit(column_idx, row_idx); + for (const auto & column_idx : missing_columns) + block_missing_values.setBits(column_idx, res.getNumRows()); return res; } diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index e31006ff0f6..7630d7bb699 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp new file mode 100644 index 00000000000..ae3b0962dc8 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp @@ -0,0 +1,265 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int EMPTY_DATA_PASSED; +} + + +JSONColumnsBaseReader::JSONColumnsBaseReader(ReadBuffer & in_) : in(&in_) +{ +} + +bool JSONColumnsBaseReader::checkColumnEnd() +{ + skipWhitespaceIfAny(*in); + if (!in->eof() && *in->position() == ']') + { + ++in->position(); + skipWhitespaceIfAny(*in); + return true; + } + return false; +} + +bool JSONColumnsBaseReader::checkColumnEndOrSkipFieldDelimiter() +{ + if (checkColumnEnd()) + return true; + skipWhitespaceIfAny(*in); + assertChar(',', *in); + skipWhitespaceIfAny(*in); + return false; +} + +bool JSONColumnsBaseReader::checkChunkEndOrSkipColumnDelimiter() +{ + if (checkChunkEnd()) + return true; + skipWhitespaceIfAny(*in); + assertChar(',', *in); + skipWhitespaceIfAny(*in); + return false; +} + +void JSONColumnsBaseReader::skipColumn() +{ + /// We assume that we already read '[', so we should skip until matched ']'. + size_t balance = 1; + bool inside_quotes = false; + char * pos; + while (!in->eof() && balance) + { + if (inside_quotes) + pos = find_first_symbols<'"'>(in->position(), in->buffer().end()); + else + pos = find_first_symbols<'[', ']', '"'>(in->position(), in->buffer().end()); + + in->position() = pos; + if (in->position() == in->buffer().end()) + continue; + + if (*in->position() == '"') + inside_quotes = !inside_quotes; + else if (*in->position() == ']') + --balance; + else if (*in->position() == '[') + ++balance; + ++in->position(); + } +} + +JSONColumnsBaseBlockInputFormat::JSONColumnsBaseBlockInputFormat( + ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr reader_) + : IInputFormat(header_, in_) + , format_settings(format_settings_) + , fields(header_.getNamesAndTypes()) + , name_to_index(header_.getNamesToIndexesMap()) + , serializations(header_.getSerializations()) + , reader(std::move(reader_)) +{ +} + +size_t JSONColumnsBaseBlockInputFormat::readColumn( + IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name) +{ + /// Check for empty column. + if (reader->checkColumnEnd()) + return 0; + + do + { + readFieldImpl(*in, column, type, serialization, column_name, format_settings, false); + } + while (!reader->checkColumnEndOrSkipFieldDelimiter()); + + return column.size(); +} + +Chunk JSONColumnsBaseBlockInputFormat::generate() +{ + MutableColumns columns = getPort().getHeader().cloneEmptyColumns(); + block_missing_values.clear(); + + if (in->eof()) + return {}; + + reader->readChunkStart(); + /// Check for empty block. + if (reader->checkChunkEnd()) + return Chunk(std::move(columns), 0); + + std::vector seen_columns(columns.size(), 0); + Int64 rows = -1; + size_t iteration = 0; + do + { + auto column_name = reader->readColumnStart(); + size_t column_index = iteration; + if (column_name.has_value()) + { + /// Check if this name appears in header. If no, skip this column or throw + /// an exception according to setting input_format_skip_unknown_fields + if (!name_to_index.contains(*column_name)) + { + if (!format_settings.skip_unknown_fields) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column found in input data: {}", *column_name); + + reader->skipColumn(); + continue; + } + column_index = name_to_index[*column_name]; + } + + if (column_index >= columns.size()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Input data has too many columns, expected {} columns", columns.size()); + + seen_columns[column_index] = 1; + size_t columns_size = readColumn(*columns[column_index], fields[column_index].type, serializations[column_index], fields[column_index].name); + if (rows != -1 && size_t(rows) != columns_size) + throw Exception(ErrorCodes::INCORRECT_DATA, "Number of rows differs in different columns: {} != {}", rows, columns_size); + rows = columns_size; + ++iteration; + } + while (!reader->checkChunkEndOrSkipColumnDelimiter()); + + if (rows <= 0) + return Chunk(std::move(columns), 0); + + /// Insert defaults in columns that were not presented in current block and fill + ///block_missing_values accordingly if setting input_format_defaults_for_omitted_fields is enabled + for (size_t i = 0; i != seen_columns.size(); ++i) + { + if (!seen_columns[i]) + { + columns[i]->insertManyDefaults(rows); + if (format_settings.defaults_for_omitted_fields) + block_missing_values.setBits(i, rows); + } + } + + return Chunk(std::move(columns), rows); +} + +JSONColumnsBaseSchemaReader::JSONColumnsBaseSchemaReader( + ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_) + : ISchemaReader(in_), format_settings(format_settings_), reader(std::move(reader_)) +{ +} + +void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) +{ + auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second) + { + return getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers); + }; + chooseResultColumnType(type, new_type, common_type_checker, nullptr, column_name, row); +} + +NamesAndTypesList JSONColumnsBaseSchemaReader::readSchema() +{ + size_t total_rows_read = 0; + std::unordered_map names_to_types; + std::vector names_order; + /// Read data block by block and determine the type for each column + /// until max_rows_to_read_for_schema_inference is reached. + while (total_rows_read < format_settings.max_rows_to_read_for_schema_inference) + { + if (in.eof()) + break; + + reader->readChunkStart(); + /// Check for empty block. + if (reader->checkChunkEnd()) + continue; + + size_t iteration = 0; + size_t rows_in_block = 0; + do + { + auto column_name_opt = reader->readColumnStart(); + /// If format doesn't have named for columns, use default names 'c1', 'c2', ... + String column_name = column_name_opt.has_value() ? *column_name_opt : "c" + std::to_string(iteration + 1); + /// Keep order of column names as it is in input data. + if (!names_to_types.contains(column_name)) + names_order.push_back(column_name); + + rows_in_block = 0; + auto column_type = readColumnAndGetDataType(column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read); + chooseResulType(names_to_types[column_name], column_type, column_name, total_rows_read + 1); + ++iteration; + } + while (!reader->checkChunkEndOrSkipColumnDelimiter()); + + total_rows_read += rows_in_block; + } + + if (names_to_types.empty()) + throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data"); + + NamesAndTypesList result; + for (auto & name : names_order) + { + auto & type = names_to_types[name]; + /// Check that we could determine the type of this column. + checkResultColumnTypeAndAppend(result, type, name, nullptr, format_settings.max_rows_to_read_for_schema_inference); + } + + return result; +} + +DataTypePtr JSONColumnsBaseSchemaReader::readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read) +{ + /// Check for empty column. + if (reader->checkColumnEnd()) + return nullptr; + + String field; + DataTypePtr column_type; + do + { + /// If we reached max_rows_to_read, skip the rest part of this column. + if (rows_read == max_rows_to_read) + { + reader->skipColumn(); + break; + } + + readJSONField(field, in); + DataTypePtr field_type = getDataTypeFromJSONField(field); + chooseResulType(column_type, field_type, column_name, rows_read); + ++rows_read; + } + while (!reader->checkColumnEndOrSkipFieldDelimiter()); + + return column_type; +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h new file mode 100644 index 00000000000..8676b5c4ad3 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h @@ -0,0 +1,90 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class ReadBuffer; + + +/// Base class for reading data in Columnar JSON formats. +class JSONColumnsBaseReader +{ +public: + JSONColumnsBaseReader(ReadBuffer & in_); + + virtual ~JSONColumnsBaseReader() = default; + + void setReadBuffer(ReadBuffer & in_) { in = &in_; } + + virtual void readChunkStart() = 0; + virtual std::optional readColumnStart() = 0; + + virtual bool checkChunkEnd() = 0; + bool checkChunkEndOrSkipColumnDelimiter(); + + bool checkColumnEnd(); + bool checkColumnEndOrSkipFieldDelimiter(); + + void skipColumn(); + +protected: + ReadBuffer * in; +}; + + +/// Base class for Columnar JSON input formats. It works with data using +/// JSONColumnsBaseReader interface. +/// To implement new Columnar JSON format you need to implement new JSONColumnsBaseReader +/// interface and provide it to JSONColumnsBaseBlockInputFormat. +class JSONColumnsBaseBlockInputFormat : public IInputFormat +{ +public: + JSONColumnsBaseBlockInputFormat(ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr reader_); + + String getName() const override { return "JSONColumnsBaseBlockInputFormat"; } + + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } + +protected: + Chunk generate() override; + + size_t readColumn(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name); + + const FormatSettings format_settings; + const NamesAndTypes fields; + /// Maps column names and their positions in header. + std::unordered_map name_to_index; + Serializations serializations; + std::unique_ptr reader; + BlockMissingValues block_missing_values; +}; + + +/// Base class for schema inference from Columnar JSON input formats. It works with data using +/// JSONColumnsBaseReader interface. +/// To implement schema reader for the new Columnar JSON format you need to implement new JSONColumnsBaseReader +/// interface and provide it to JSONColumnsBaseSchemaReader. +class JSONColumnsBaseSchemaReader : public ISchemaReader +{ +public: + JSONColumnsBaseSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_); + +private: + NamesAndTypesList readSchema() override; + + /// Read whole column in the block (up to max_rows_to_read rows) and extract the data type. + DataTypePtr readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read); + + /// Choose result type for column from two inferred types from different rows. + void chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row); + + const FormatSettings format_settings; + std::unique_ptr reader; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp new file mode 100644 index 00000000000..c998b95067c --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp @@ -0,0 +1,82 @@ +#include +#include +#include + + +namespace DB +{ + +JSONColumnsBaseBlockOutputFormat::JSONColumnsBaseBlockOutputFormat( + WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_) + : IOutputFormat(header_, out_) + , format_settings(format_settings_) + , mono_block(mono_block_) + , serializations(header_.getSerializations()) + , ostr(&out) + , max_rows_in_mono_block(format_settings_.json_columns.max_rows_to_buffer) +{ +} + +void JSONColumnsBaseBlockOutputFormat::consume(Chunk chunk) +{ + if (!mono_block) + { + writeChunk(chunk); + return; + } + + if (!mono_chunk) + { + mono_chunk = std::move(chunk); + total_rows_in_mono_block = mono_chunk.getNumRows(); + return; + } + + /// Copy up to (max_rows_in_mono_block - total_rows_in_mono_block) rows. + size_t length = chunk.getNumRows(); + if (total_rows_in_mono_block + length > max_rows_in_mono_block) + length = max_rows_in_mono_block - total_rows_in_mono_block; + mono_chunk.append(chunk, length); + total_rows_in_mono_block += length; +} + +void JSONColumnsBaseBlockOutputFormat::writeSuffix() +{ + if (mono_chunk) + { + writeChunk(mono_chunk); + mono_chunk.clear(); + } +} + +void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk) +{ + writeChunkStart(); + const auto & columns = chunk.getColumns(); + for (size_t i = 0; i != columns.size(); ++i) + { + writeColumnStart(i); + writeColumn(*columns[i], *serializations[i]); + writeColumnEnd(i == columns.size() - 1); + } + writeChunkEnd(); +} + +void JSONColumnsBaseBlockOutputFormat::writeColumnEnd(bool is_last) +{ + writeJSONCompactArrayEnd(*ostr); + if (!is_last) + writeJSONFieldDelimiter(*ostr); +} + +void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const ISerialization & serialization) +{ + for (size_t i = 0; i != column.size(); ++i) + { + if (i != 0) + writeJSONFieldCompactDelimiter(*ostr); + serialization.serializeTextJSON(column, i, *ostr, format_settings); + } +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h new file mode 100644 index 00000000000..2c680c9e4cd --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class WriteBuffer; + +/// Base class for Columnar JSON output formats. +/// It outputs data block by block. If mono_block_ argument is true, +/// it will buffer up to output_format_json_columns_max_rows_to_buffer rows +/// and outputs them as a single block in writeSuffix() method. +class JSONColumnsBaseBlockOutputFormat : public IOutputFormat +{ +public: + JSONColumnsBaseBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_); + + String getName() const override { return "JSONColumnsBaseBlockOutputFormat"; } + +protected: + void consume(Chunk chunk) override; + void writeSuffix() override; + + void writeChunk(Chunk & chunk); + void writeColumn(const IColumn & column, const ISerialization & serialization); + + virtual void writeChunkStart() = 0; + virtual void writeChunkEnd() = 0; + virtual void writeColumnStart(size_t /*column_index*/) = 0; + void writeColumnEnd(bool is_last); + + const FormatSettings format_settings; + bool mono_block; + Serializations serializations; + + WriteBuffer * ostr; + + /// For mono_block == true only + Chunk mono_chunk; + size_t max_rows_in_mono_block; + size_t total_rows_in_mono_block = 0; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp new file mode 100644 index 00000000000..2f924be2c14 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp @@ -0,0 +1,70 @@ +#include +#include +#include + +namespace DB +{ + +JSONColumnsReader::JSONColumnsReader(ReadBuffer & in_) : JSONColumnsBaseReader(in_) +{ +} + +void JSONColumnsReader::readChunkStart() +{ + skipWhitespaceIfAny(*in); + assertChar('{', *in); + skipWhitespaceIfAny(*in); +} + +std::optional JSONColumnsReader::readColumnStart() +{ + skipWhitespaceIfAny(*in); + String name; + readJSONString(name, *in); + skipWhitespaceIfAny(*in); + assertChar(':', *in); + skipWhitespaceIfAny(*in); + assertChar('[', *in); + skipWhitespaceIfAny(*in); + return name; +} + +bool JSONColumnsReader::checkChunkEnd() +{ + skipWhitespaceIfAny(*in); + if (!in->eof() && *in->position() == '}') + { + ++in->position(); + skipWhitespaceIfAny(*in); + return true; + } + return false; +} + + +void registerInputFormatJSONColumns(FormatFactory & factory) +{ + factory.registerInputFormat( + "JSONColumns", + [](ReadBuffer & buf, + const Block &sample, + const RowInputFormatParams &, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, settings, std::make_unique(buf)); + } + ); +} + +void registerJSONColumnsSchemaReader(FormatFactory & factory) +{ + factory.registerSchemaReader( + "JSONColumns", + [](ReadBuffer & buf, const FormatSettings & settings) + { + return std::make_shared(buf, settings, std::make_unique(buf)); + } + ); +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h new file mode 100644 index 00000000000..382da947c03 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +/* Format JSONColumns reads each block of data in the next format: + * { + * "name1": [value1, value2, value3, ...], + * "name2": [value1, value2m value3, ...], + * ... + * } + */ +class JSONColumnsReader : public JSONColumnsBaseReader +{ +public: + JSONColumnsReader(ReadBuffer & in_); + + void readChunkStart() override; + std::optional readColumnStart() override; + bool checkChunkEnd() override; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp new file mode 100644 index 00000000000..e8b1b303ebd --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, size_t indent_) + : JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_, mono_block_), fields(header_.getNamesAndTypes()), indent(indent_) +{ + for (auto & field : fields) + { + WriteBufferFromOwnString buf; + writeJSONString(field.name, buf, format_settings); + field.name = buf.str().substr(1, buf.str().size() - 2); + } +} + +void JSONColumnsBlockOutputFormat::writeChunkStart() +{ + writeJSONObjectStart(*ostr, indent); +} + +void JSONColumnsBlockOutputFormat::writeColumnStart(size_t column_index) +{ + writeJSONCompactArrayStart(*ostr, indent + 1, fields[column_index].name.data()); +} + +void JSONColumnsBlockOutputFormat::writeChunkEnd() +{ + writeJSONObjectEnd(*ostr, indent); + writeChar('\n', *ostr); +} + +void registerOutputFormatJSONColumns(FormatFactory & factory) +{ + for (const auto & [name, mono_block] : {std::make_pair("JSONColumns", false), std::make_pair("JSONColumnsMonoBlock", true)}) + { + factory.registerOutputFormat(name, [mono_block = mono_block]( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams &, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings, mono_block); + }); + } + + factory.markOutputFormatSupportsParallelFormatting("JSONColumns"); +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h new file mode 100644 index 00000000000..d10735b6227 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h @@ -0,0 +1,35 @@ +#pragma once + +#include + +namespace DB +{ + +/* Format JSONColumns outputs each block of data in the next format: + * { + * "name1": [value1, value2, value3, ...], + * "name2": [value1, value2m value3, ...], + * ... + * } + * There is also JSONColumnsMonoBlock format that buffers up to output_format_json_columns_max_rows_to_buffer rows + * and outputs them as a single block. + */ +class JSONColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat +{ +public: + /// no_escapes - do not use ANSI escape sequences - to display in the browser, not in the console. + JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, size_t indent_ = 0); + + String getName() const override { return "JSONColumnsBlockOutputFormat"; } + +protected: + void writeChunkStart() override; + void writeChunkEnd() override; + + void writeColumnStart(size_t column_index) override; + + NamesAndTypes fields; + size_t indent; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp new file mode 100644 index 00000000000..eed21e8f9c0 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : JSONColumnsBlockOutputFormat(out_, header_, format_settings_, true, 1) +{ + bool need_validate_utf8 = false; + makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8); + + if (need_validate_utf8) + { + validating_ostr = std::make_unique(out); + ostr = validating_ostr.get(); + } +} + +void JSONColumnsWithMetadataBlockOutputFormat::writePrefix() +{ + writeJSONObjectStart(*ostr); + writeJSONMetadata(fields, format_settings, *ostr); +} + +void JSONColumnsWithMetadataBlockOutputFormat::writeChunkStart() +{ + writeJSONFieldDelimiter(*ostr, 2); + writeJSONObjectStart(*ostr, 1, "data"); +} + +void JSONColumnsWithMetadataBlockOutputFormat::writeChunkEnd() +{ + writeJSONObjectEnd(*ostr, indent); +} + +void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk) +{ + auto num_rows = chunk.getNumRows(); + if (num_rows != 2) + throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR); + + const auto & columns = chunk.getColumns(); + writeJSONFieldDelimiter(*ostr, 2); + writeJSONObjectStart(*ostr, 1, "extremes"); + writeExtremesElement("min", columns, 0); + writeJSONFieldDelimiter(*ostr); + writeExtremesElement("max", columns, 1); + writeJSONObjectEnd(*ostr, 1); +} + +void JSONColumnsWithMetadataBlockOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) +{ + writeJSONObjectStart(*ostr, 2, title); + writeJSONColumns(columns, fields, serializations, row_num, false, format_settings, *ostr, 3); + writeJSONObjectEnd(*ostr, 2); +} + +void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) +{ + auto num_rows = chunk.getNumRows(); + if (num_rows != 1) + throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR); + + const auto & columns = chunk.getColumns(); + writeJSONFieldDelimiter(*ostr, 2); + writeJSONObjectStart(*ostr, 1, "totals"); + writeJSONColumns(columns, fields, serializations, 0, false, format_settings, *ostr, 2); + writeJSONObjectEnd(*ostr, 1); +} + +void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() +{ + auto outside_statistics = getOutsideStatistics(); + if (outside_statistics) + statistics = std::move(*outside_statistics); + + writeJSONAdditionalInfo( + total_rows_in_mono_block, + statistics.rows_before_limit, + statistics.applied_limit, + statistics.watch, + statistics.progress, + format_settings.write_statistics, + *ostr); + + writeJSONObjectEnd(*ostr); + writeChar('\n', *ostr); + ostr->next(); +} + +void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory) +{ + factory.registerOutputFormat("JSONColumnsWithMetadata", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams &, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings); + }); + + factory.markFormatHasNoAppendSupport("JSONColumnsWithMetadata"); +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h new file mode 100644 index 00000000000..92be568504e --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h @@ -0,0 +1,66 @@ +#pragma once +#include + +namespace DB +{ + +/* Format JSONColumnsWithMetadata buffers up to output_format_json_columns_max_rows_to_buffer rows + * and outputs them in the next format: + * { + * "meta": + * [ + * { + * "name": "name1", + * "type": "type1" + * }, + * { + * "name": "name2", + * "type": "type2" + * }, + * ... + * ], + * + * "data": + * { + * "name1": [value1, value2, value3, ...], + * "name2": [value1, value2m value3, ...], + * ... + * }, + * + * "rows": ..., + * + * "statistics": + * { + * "elapsed": ..., + * "rows_read": ..., + * "bytes_read": ... + * } + * } + */ +class JSONColumnsWithMetadataBlockOutputFormat : public JSONColumnsBlockOutputFormat +{ +public: + JSONColumnsWithMetadataBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + + String getName() const override { return "JSONCompactColumnsBlockOutputFormat"; } + + void setRowsBeforeLimit(size_t rows_before_limit_) override { statistics.rows_before_limit = rows_before_limit_; statistics.applied_limit = true; } + void onProgress(const Progress & progress_) override { statistics.progress.incrementPiecewiseAtomically(progress_); } + +protected: + void consumeTotals(Chunk chunk) override; + void consumeExtremes(Chunk chunk) override; + + void writePrefix() override; + void finalizeImpl() override; + + void writeChunkStart() override; + void writeChunkEnd() override; + + void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); + + Statistics statistics; + std::unique_ptr validating_ostr; /// Validates UTF-8 sequences, replaces bad sequences with replacement character. +}; + +} diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp new file mode 100644 index 00000000000..15db9979e45 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp @@ -0,0 +1,65 @@ +#include +#include +#include + +namespace DB +{ + +JSONCompactColumnsReader::JSONCompactColumnsReader(ReadBuffer & in_) : JSONColumnsBaseReader(in_) +{ +} + +void JSONCompactColumnsReader::readChunkStart() +{ + skipWhitespaceIfAny(*in); + assertChar('[', *in); + skipWhitespaceIfAny(*in); +} + +std::optional JSONCompactColumnsReader::readColumnStart() +{ + skipWhitespaceIfAny(*in); + assertChar('[', *in); + skipWhitespaceIfAny(*in); + return std::nullopt; +} + +bool JSONCompactColumnsReader::checkChunkEnd() +{ + skipWhitespaceIfAny(*in); + if (!in->eof() && *in->position() == ']') + { + ++in->position(); + skipWhitespaceIfAny(*in); + return true; + } + return false; +} + + +void registerInputFormatJSONCompactColumns(FormatFactory & factory) +{ + factory.registerInputFormat( + "JSONCompactColumns", + [](ReadBuffer & buf, + const Block &sample, + const RowInputFormatParams &, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, settings, std::make_unique(buf)); + } + ); +} + +void registerJSONCompactColumnsSchemaReader(FormatFactory & factory) +{ + factory.registerSchemaReader( + "JSONCompactColumns", + [](ReadBuffer & buf, const FormatSettings & settings) + { + return std::make_shared(buf, settings, std::make_unique(buf)); + } + ); +} + +} diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h new file mode 100644 index 00000000000..ab3824a3f9b --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +/* Format JSONCompactColumns reads each block of data in the next format: + * [ + * [value1, value2, value3, ...], + * [value1, value2m value3, ...], + * ... + * ] + */ +class JSONCompactColumnsReader : public JSONColumnsBaseReader +{ +public: + JSONCompactColumnsReader(ReadBuffer & in_); + + void readChunkStart() override; + std::optional readColumnStart() override; + bool checkChunkEnd() override; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp new file mode 100644 index 00000000000..18bd33c3fb2 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include + +namespace DB +{ + +JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_) + : JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_, mono_block_), column_names(header_.getNames()) +{ +} + +void JSONCompactColumnsBlockOutputFormat::writeChunkStart() +{ + writeJSONArrayStart(*ostr); +} + +void JSONCompactColumnsBlockOutputFormat::writeColumnStart(size_t) +{ + writeJSONCompactArrayStart(*ostr, 1); +} + +void JSONCompactColumnsBlockOutputFormat::writeChunkEnd() +{ + writeJSONArrayEnd(*ostr); + writeChar('\n', *ostr); +} + +void registerOutputFormatJSONCompactColumns(FormatFactory & factory) +{ + for (const auto & [name, mono_block] : {std::make_pair("JSONCompactColumns", false), std::make_pair("JSONCompactColumnsMonoBlock", true)}) + { + factory.registerOutputFormat(name, [mono_block = mono_block]( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams &, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings, mono_block); + }); + } + + factory.markOutputFormatSupportsParallelFormatting("JSONCompactColumns"); +} + +} diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h new file mode 100644 index 00000000000..5051e7d93cc --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h @@ -0,0 +1,33 @@ +#pragma once +#include + +namespace DB +{ + +/* Format JSONCompactColumns outputs each block of data in the next format: + * [ + * [value1, value2, value3, ...], + * [value1, value2m value3, ...], + * ... + * ] + * There is also JSONCompactColumnsMonoBlock format that buffers up to output_format_json_columns_max_rows_to_buffer rows + * and outputs them as a single block. + */ +class JSONCompactColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat +{ +public: + /// no_escapes - do not use ANSI escape sequences - to display in the browser, not in the console. + JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_); + + String getName() const override { return "JSONCompactColumnsBlockOutputFormat"; } + +protected: + void writeChunkStart() override; + void writeChunkEnd() override; + + void writeColumnStart(size_t column_index) override; + + Names column_names; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 867b56c541b..140f3fb41b3 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp index b31c04b4554..fbb4a8d9116 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -20,72 +21,50 @@ JSONCompactRowOutputFormat::JSONCompactRowOutputFormat( void JSONCompactRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - if (yield_strings) - { - WriteBufferFromOwnString buf; - - serialization.serializeText(column, row_num, buf, settings); - writeJSONString(buf.str(), *ostr, settings); - } - else - serialization.serializeTextJSON(column, row_num, *ostr, settings); - + writeJSONFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); ++field_number; } void JSONCompactRowOutputFormat::writeFieldDelimiter() { - writeCString(", ", *ostr); + writeJSONFieldCompactDelimiter(*ostr); } -void JSONCompactRowOutputFormat::writeTotalsFieldDelimiter() -{ - writeCString(",", *ostr); -} - - void JSONCompactRowOutputFormat::writeRowStartDelimiter() { - writeCString("\t\t[", *ostr); + writeJSONCompactArrayStart(*ostr, 2); } void JSONCompactRowOutputFormat::writeRowEndDelimiter() { - writeChar(']', *ostr); + writeJSONCompactArrayEnd(*ostr); field_number = 0; ++row_count; } void JSONCompactRowOutputFormat::writeBeforeTotals() { - writeCString(",\n", *ostr); - writeChar('\n', *ostr); - writeCString("\t\"totals\": [", *ostr); + writeJSONFieldDelimiter(*ostr, 2); + writeJSONCompactArrayStart(*ostr, 1, "totals"); +} + +void JSONCompactRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) +{ + writeJSONCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); } void JSONCompactRowOutputFormat::writeAfterTotals() { - writeChar(']', *ostr); + writeJSONCompactArrayEnd(*ostr); } void JSONCompactRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeCString("\t\t\"", *ostr); - writeCString(title, *ostr); - writeCString("\": [", *ostr); - - size_t extremes_columns = columns.size(); - for (size_t i = 0; i < extremes_columns; ++i) - { - if (i != 0) - writeTotalsFieldDelimiter(); - - writeField(*columns[i], *serializations[i], row_num); - } - - writeChar(']', *ostr); + writeJSONCompactArrayStart(*ostr, 2, title); + writeJSONCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); + writeJSONCompactArrayEnd(*ostr); } void registerOutputFormatJSONCompact(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h index a0e9a2a6026..d17a6acf019 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h @@ -36,12 +36,7 @@ private: void writeExtremesElement(const char * title, const Columns & columns, size_t row_num) override; - void writeTotalsField(const IColumn & column, const ISerialization & serialization, size_t row_num) override - { - return writeField(column, serialization, row_num); - } - - void writeTotalsFieldDelimiter() override; + void writeTotals(const Columns & columns, size_t row_num) override; }; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 4fb7a40ebfc..e9d6b516feb 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 61ac25ca441..86ccd2ddb12 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -15,23 +16,9 @@ JSONRowOutputFormat::JSONRowOutputFormat( bool yield_strings_) : IRowOutputFormat(header, out_, params_), settings(settings_), yield_strings(yield_strings_) { - const auto & sample = getPort(PortKind::Main).getHeader(); - NamesAndTypesList columns(sample.getNamesAndTypesList()); - fields.assign(columns.begin(), columns.end()); - bool need_validate_utf8 = false; - for (size_t i = 0; i < sample.columns(); ++i) - { - if (!sample.getByPosition(i).type->textCanContainOnlyValidUTF8()) - need_validate_utf8 = true; - - WriteBufferFromOwnString buf; - { - WriteBufferValidUTF8 validating_buf(buf); - writeJSONString(fields[i].name, validating_buf, settings); - } - fields[i].name = buf.str(); - } + fields = header.getNamesAndTypes(); + makeNamesAndTypesWithValidUTF8(fields, settings, need_validate_utf8); if (need_validate_utf8) { @@ -45,88 +32,34 @@ JSONRowOutputFormat::JSONRowOutputFormat( void JSONRowOutputFormat::writePrefix() { - writeCString("{\n", *ostr); - writeCString("\t\"meta\":\n", *ostr); - writeCString("\t[\n", *ostr); - - for (size_t i = 0; i < fields.size(); ++i) - { - writeCString("\t\t{\n", *ostr); - - writeCString("\t\t\t\"name\": ", *ostr); - writeString(fields[i].name, *ostr); - writeCString(",\n", *ostr); - writeCString("\t\t\t\"type\": ", *ostr); - writeJSONString(fields[i].type->getName(), *ostr, settings); - writeChar('\n', *ostr); - - writeCString("\t\t}", *ostr); - if (i + 1 < fields.size()) - writeChar(',', *ostr); - writeChar('\n', *ostr); - } - - writeCString("\t],\n", *ostr); - writeChar('\n', *ostr); - writeCString("\t\"data\":\n", *ostr); - writeCString("\t[\n", *ostr); + writeJSONObjectStart(*ostr); + writeJSONMetadata(fields, settings, *ostr); + writeJSONFieldDelimiter(*ostr, 2); + writeJSONArrayStart(*ostr, 1, "data"); } void JSONRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - writeCString("\t\t\t", *ostr); - writeString(fields[field_number].name, *ostr); - writeCString(": ", *ostr); - - if (yield_strings) - { - WriteBufferFromOwnString buf; - - serialization.serializeText(column, row_num, buf, settings); - writeJSONString(buf.str(), *ostr, settings); - } - else - serialization.serializeTextJSON(column, row_num, *ostr, settings); - - ++field_number; -} - -void JSONRowOutputFormat::writeTotalsField(const IColumn & column, const ISerialization & serialization, size_t row_num) -{ - writeCString("\t\t", *ostr); - writeString(fields[field_number].name, *ostr); - writeCString(": ", *ostr); - - if (yield_strings) - { - WriteBufferFromOwnString buf; - - serialization.serializeText(column, row_num, buf, settings); - writeJSONString(buf.str(), *ostr, settings); - } - else - serialization.serializeTextJSON(column, row_num, *ostr, settings); - + writeJSONFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr, fields[field_number].name, 3); ++field_number; } void JSONRowOutputFormat::writeFieldDelimiter() { - writeCString(",\n", *ostr); + writeJSONFieldDelimiter(out); } void JSONRowOutputFormat::writeRowStartDelimiter() { - writeCString("\t\t{\n", *ostr); + writeJSONObjectStart(*ostr, 2); } void JSONRowOutputFormat::writeRowEndDelimiter() { - writeChar('\n', *ostr); - writeCString("\t\t}", *ostr); + writeJSONObjectEnd(*ostr, 2); field_number = 0; ++row_count; } @@ -134,71 +67,42 @@ void JSONRowOutputFormat::writeRowEndDelimiter() void JSONRowOutputFormat::writeRowBetweenDelimiter() { - writeCString(",\n", *ostr); + writeJSONFieldDelimiter(out); } void JSONRowOutputFormat::writeSuffix() { - writeChar('\n', *ostr); - writeCString("\t]", *ostr); + writeJSONArrayEnd(*ostr, 1); } void JSONRowOutputFormat::writeBeforeTotals() { - writeCString(",\n", *ostr); - writeChar('\n', *ostr); - writeCString("\t\"totals\":\n", *ostr); - writeCString("\t{\n", *ostr); + writeJSONFieldDelimiter(*ostr, 2); + writeJSONObjectStart(*ostr, 1, "totals"); } void JSONRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { - size_t columns_size = columns.size(); - - for (size_t i = 0; i < columns_size; ++i) - { - if (i != 0) - writeTotalsFieldDelimiter(); - - writeTotalsField(*columns[i], *serializations[i], row_num); - } + writeJSONColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 2); } void JSONRowOutputFormat::writeAfterTotals() { - writeChar('\n', *ostr); - writeCString("\t}", *ostr); - field_number = 0; + writeJSONObjectEnd(*ostr, 1); } void JSONRowOutputFormat::writeBeforeExtremes() { - writeCString(",\n", *ostr); - writeChar('\n', *ostr); - writeCString("\t\"extremes\":\n", *ostr); - writeCString("\t{\n", *ostr); + writeJSONFieldDelimiter(*ostr, 2); + writeJSONObjectStart(*ostr, 1, "extremes"); } void JSONRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeCString("\t\t\"", *ostr); - writeCString(title, *ostr); - writeCString("\":\n", *ostr); - writeCString("\t\t{\n", *ostr); - - size_t extremes_columns = columns.size(); - for (size_t i = 0; i < extremes_columns; ++i) - { - if (i != 0) - writeFieldDelimiter(); - - writeField(*columns[i], *serializations[i], row_num); - } - - writeChar('\n', *ostr); - writeCString("\t\t}", *ostr); - field_number = 0; + writeJSONObjectStart(*ostr, 2, title); + writeJSONColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 3); + writeJSONObjectEnd(*ostr, 2); } void JSONRowOutputFormat::writeMinExtreme(const Columns & columns, size_t row_num) @@ -213,58 +117,29 @@ void JSONRowOutputFormat::writeMaxExtreme(const Columns & columns, size_t row_nu void JSONRowOutputFormat::writeAfterExtremes() { - writeChar('\n', *ostr); - writeCString("\t}", *ostr); + writeJSONObjectEnd(*ostr, 1); } void JSONRowOutputFormat::finalizeImpl() { - writeCString(",\n\n", *ostr); - writeCString("\t\"rows\": ", *ostr); - writeIntText(row_count, *ostr); - auto outside_statistics = getOutsideStatistics(); if (outside_statistics) statistics = std::move(*outside_statistics); - writeRowsBeforeLimitAtLeast(); - - if (settings.write_statistics) - writeStatistics(); + writeJSONAdditionalInfo( + row_count, + statistics.rows_before_limit, + statistics.applied_limit, + statistics.watch, + statistics.progress, + settings.write_statistics, + *ostr); + writeJSONObjectEnd(*ostr); writeChar('\n', *ostr); - writeCString("}\n", *ostr); ostr->next(); } -void JSONRowOutputFormat::writeRowsBeforeLimitAtLeast() -{ - if (statistics.applied_limit) - { - writeCString(",\n\n", *ostr); - writeCString("\t\"rows_before_limit_at_least\": ", *ostr); - writeIntText(statistics.rows_before_limit, *ostr); - } -} - -void JSONRowOutputFormat::writeStatistics() -{ - writeCString(",\n\n", *ostr); - writeCString("\t\"statistics\":\n", *ostr); - writeCString("\t{\n", *ostr); - - writeCString("\t\t\"elapsed\": ", *ostr); - writeText(statistics.watch.elapsedSeconds(), *ostr); - writeCString(",\n", *ostr); - writeCString("\t\t\"rows_read\": ", *ostr); - writeText(statistics.progress.read_rows.load(), *ostr); - writeCString(",\n", *ostr); - writeCString("\t\t\"bytes_read\": ", *ostr); - writeText(statistics.progress.read_bytes.load(), *ostr); - writeChar('\n', *ostr); - - writeCString("\t}", *ostr); -} void JSONRowOutputFormat::onProgress(const Progress & value) { diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.h b/src/Processors/Formats/Impl/JSONRowOutputFormat.h index 8561f5b4870..3459cc1b7a6 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.h @@ -63,12 +63,7 @@ protected: void finalizeImpl() override; - virtual void writeTotalsField(const IColumn & column, const ISerialization & serialization, size_t row_num); virtual void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); - virtual void writeTotalsFieldDelimiter() { writeFieldDelimiter(); } - - void writeRowsBeforeLimitAtLeast(); - void writeStatistics(); void onRowsReadBeforeUpdate() override { row_count = getRowsReadBefore(); } diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index dc346b4f5f5..30084804d92 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index 5f3f015a5b1..7768339b064 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -398,7 +398,7 @@ bool MySQLDumpRowInputFormat::readField(IColumn & column, size_t column_idx) void MySQLDumpRowInputFormat::skipField() { String tmp; - readQuotedFieldIntoString(tmp, *in); + readQuotedField(tmp, *in); } MySQLDumpSchemaReader::MySQLDumpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) @@ -434,7 +434,7 @@ DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes() if (!data_types.empty()) skipFieldDelimiter(in); - readQuotedFieldIntoString(value, in); + readQuotedField(value, in); auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); } diff --git a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp index a82285c1c19..7cf133e5739 100644 --- a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp @@ -13,7 +13,7 @@ namespace DB { ODBCDriver2BlockOutputFormat::ODBCDriver2BlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings(format_settings_) + : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()) { } @@ -23,7 +23,7 @@ static void writeODBCString(WriteBuffer & out, const std::string & str) out.write(str.data(), str.size()); } -void ODBCDriver2BlockOutputFormat::writeRow(const Serializations & serializations, const Columns & columns, size_t row_idx, std::string & buffer) +void ODBCDriver2BlockOutputFormat::writeRow(const Columns & columns, size_t row_idx, std::string & buffer) { size_t num_columns = columns.size(); for (size_t column_idx = 0; column_idx < num_columns; ++column_idx) @@ -46,20 +46,14 @@ void ODBCDriver2BlockOutputFormat::writeRow(const Serializations & serialization } } -void ODBCDriver2BlockOutputFormat::write(Chunk chunk, PortKind port_kind) +void ODBCDriver2BlockOutputFormat::write(Chunk chunk, PortKind) { String text_value; - const auto & header = getPort(port_kind).getHeader(); const auto & columns = chunk.getColumns(); - size_t num_columns = columns.size(); - Serializations serializations(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getDefaultSerialization(); - const size_t rows = chunk.getNumRows(); for (size_t i = 0; i < rows; ++i) - writeRow(serializations, columns, i, text_value); + writeRow(columns, i, text_value); } void ODBCDriver2BlockOutputFormat::consume(Chunk chunk) diff --git a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.h b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.h index de6ea22dfd7..9a0a43aa5bb 100644 --- a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.h @@ -35,8 +35,9 @@ private: void writePrefix() override; const FormatSettings format_settings; + Serializations serializations; - void writeRow(const Serializations & serializations, const Columns & columns, size_t row_idx, std::string & buffer); + void writeRow(const Columns & columns, size_t row_idx, std::string & buffer); void write(Chunk chunk, PortKind port_kind); }; diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 1531c0d2794..87351b6c5d9 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -65,10 +65,8 @@ Chunk ORCBlockInputFormat::generate() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) - for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) - for (const auto & column_idx : missing_columns) - block_missing_values.setBit(column_idx, row_idx); - + for (const auto & column_idx : missing_columns) + block_missing_values.setBits(column_idx, res.getNumRows()); return res; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 86987c665e0..062f161b7f9 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -66,9 +66,8 @@ Chunk ParquetBlockInputFormat::generate() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) - for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) - for (const auto & column_idx : missing_columns) - block_missing_values.setBit(column_idx, row_idx); + for (const auto & column_idx : missing_columns) + block_missing_values.setBits(column_idx, res.getNumRows()); return res; } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index ad65a5f707d..8fbf0a14916 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes PrettyBlockOutputFormat::PrettyBlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings(format_settings_) + : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()) { struct winsize w; if (0 == ioctl(STDOUT_FILENO, TIOCGWINSZ, &w)) @@ -143,7 +143,7 @@ GridSymbols ascii_grid_symbols { } -void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) +void PrettyBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { UInt64 max_rows = format_settings.pretty.max_rows; @@ -158,10 +158,6 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) const auto & columns = chunk.getColumns(); const auto & header = getPort(port_kind).getHeader(); - Serializations serializations(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getSerialization(*columns[i]->getSerializationInfo()); - WidthsPerColumn widths; Widths max_widths; Widths name_widths; @@ -371,21 +367,21 @@ void PrettyBlockOutputFormat::writeValueWithPadding( void PrettyBlockOutputFormat::consume(Chunk chunk) { - write(chunk, PortKind::Main); + write(std::move(chunk), PortKind::Main); } void PrettyBlockOutputFormat::consumeTotals(Chunk chunk) { total_rows = 0; writeCString("\nTotals:\n", out); - write(chunk, PortKind::Totals); + write(std::move(chunk), PortKind::Totals); } void PrettyBlockOutputFormat::consumeExtremes(Chunk chunk) { total_rows = 0; writeCString("\nExtremes:\n", out); - write(chunk, PortKind::Extremes); + write(std::move(chunk), PortKind::Extremes); } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 091010f9131..cfdd2213515 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -33,11 +33,12 @@ protected: size_t row_number_width = 7; // "10000. " const FormatSettings format_settings; + Serializations serializations; using Widths = PODArray; using WidthsPerColumn = std::vector; - virtual void write(const Chunk & chunk, PortKind port_kind); + virtual void write(Chunk chunk, PortKind port_kind); void writeSuffix() override; void onRowsReadBeforeUpdate() override { total_rows = getRowsReadBefore(); } diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index be8751cde13..b760de71e4d 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -149,7 +149,6 @@ void PrettyCompactBlockOutputFormat::writeBottom(const Widths & max_widths) void PrettyCompactBlockOutputFormat::writeRow( size_t row_num, const Block & header, - const Serializations & serializations, const Columns & columns, const WidthsPerColumn & widths, const Widths & max_widths) @@ -187,7 +186,7 @@ void PrettyCompactBlockOutputFormat::writeRow( writeCString("\n", out); } -void PrettyCompactBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) +void PrettyCompactBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { UInt64 max_rows = format_settings.pretty.max_rows; @@ -202,18 +201,11 @@ void PrettyCompactBlockOutputFormat::write(const Chunk & chunk, PortKind port_ki { if (!mono_chunk) { - mono_chunk = chunk.clone(); + mono_chunk = std::move(chunk); return; } - MutableColumns mutation = mono_chunk.mutateColumns(); - for (size_t position = 0; position < mutation.size(); ++position) - { - auto column = chunk.getColumns()[position]; - mutation[position]->insertRangeFrom(*column, 0, column->size()); - } - size_t rows = mutation[0]->size(); - mono_chunk.setColumns(std::move(mutation), rows); + mono_chunk.append(chunk, chunk.getNumRows()); return; } else @@ -241,13 +233,8 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po writeHeader(header, max_widths, name_widths); - size_t num_columns = header.columns(); - Serializations serializations(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getDefaultSerialization(); - for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i) - writeRow(i, header, serializations, columns, widths, max_widths); + writeRow(i, header, columns, widths, max_widths); writeBottom(max_widths); diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h index a52ffe3d70a..5c39328051c 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h @@ -17,13 +17,12 @@ public: String getName() const override { return "PrettyCompactBlockOutputFormat"; } private: - void write(const Chunk & chunk, PortKind port_kind) override; + void write(Chunk chunk, PortKind port_kind) override; void writeHeader(const Block & block, const Widths & max_widths, const Widths & name_widths); void writeBottom(const Widths & max_widths); void writeRow( size_t row_num, const Block & header, - const Serializations & serializations, const Columns & columns, const WidthsPerColumn & widths, const Widths & max_widths); diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 85b27a6fb57..36e2aabf7f8 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -9,7 +9,7 @@ namespace DB { -void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) +void PrettySpaceBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { UInt64 max_rows = format_settings.pretty.max_rows; diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.h index b3090497783..6a8cb4e799c 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.h @@ -17,7 +17,7 @@ public: String getName() const override { return "PrettySpaceBlockOutputFormat"; } private: - void write(const Chunk & chunk, PortKind port_kind) override; + void write(Chunk chunk, PortKind port_kind) override; void writeSuffix() override; }; diff --git a/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp index 14dec8420a8..0e29d74b419 100644 --- a/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp @@ -8,12 +8,8 @@ namespace DB { TSKVRowOutputFormat::TSKVRowOutputFormat(WriteBuffer & out_, const Block & header, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : TabSeparatedRowOutputFormat(out_, header, false, false, false, params_, format_settings_) + : TabSeparatedRowOutputFormat(out_, header, false, false, false, params_, format_settings_), fields(header.getNamesAndTypes()) { - const auto & sample = getPort(PortKind::Main).getHeader(); - NamesAndTypesList columns(sample.getNamesAndTypesList()); - fields.assign(columns.begin(), columns.end()); - for (auto & field : fields) { WriteBufferFromOwnString wb; diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index 5c5b99f61da..0e7bdb259ac 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -3,7 +3,6 @@ #include #include #include -#include namespace DB @@ -17,15 +16,9 @@ namespace ErrorCodes TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & settings_, ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_, std::string row_between_delimiter_) - : IOutputFormat(header_, out_), settings(settings_), format(std::move(format_)) + : IOutputFormat(header_, out_), settings(settings_), serializations(header_.getSerializations()), format(std::move(format_)) , row_format(std::move(row_format_)), row_between_delimiter(std::move(row_between_delimiter_)) { - const auto & sample = getPort(PortKind::Main).getHeader(); - size_t columns = sample.columns(); - serializations.resize(columns); - for (size_t i = 0; i < columns; ++i) - serializations[i] = sample.safeGetByPosition(i).type->getDefaultSerialization(); - /// Validate format string for whole output size_t data_idx = format.format_idx_to_column_idx.size() + 1; for (size_t i = 0; i < format.format_idx_to_column_idx.size(); ++i) diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 727b7fb0a1f..41f77f8bbf2 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -49,11 +49,8 @@ ValuesBlockInputFormat::ValuesBlockInputFormat( params(params_), format_settings(format_settings_), num_columns(header_.columns()), parser_type_for_column(num_columns, ParserType::Streaming), attempts_to_deduce_template(num_columns), attempts_to_deduce_template_cached(num_columns), - rows_parsed_using_template(num_columns), templates(num_columns), types(header_.getDataTypes()) + rows_parsed_using_template(num_columns), templates(num_columns), types(header_.getDataTypes()), serializations(header_.getSerializations()) { - serializations.resize(types.size()); - for (size_t i = 0; i < types.size(); ++i) - serializations[i] = types[i]->getDefaultSerialization(); } Chunk ValuesBlockInputFormat::generate() @@ -599,7 +596,7 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes() skipWhitespaceIfAny(buf); } - readQuotedFieldIntoString(value, buf); + readQuotedField(value, buf); auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); } diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp index cc2b37189f9..d5fb29874d1 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp @@ -8,11 +8,9 @@ namespace DB { XMLRowOutputFormat::XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), format_settings(format_settings_) + : IRowOutputFormat(header_, out_, params_), fields(header_.getNamesAndTypes()), format_settings(format_settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); - NamesAndTypesList columns(sample.getNamesAndTypesList()); - fields.assign(columns.begin(), columns.end()); field_tag_names.resize(sample.columns()); bool need_validate_utf8 = false; @@ -200,7 +198,6 @@ void XMLRowOutputFormat::onProgress(const Progress & value) void XMLRowOutputFormat::finalizeImpl() { - writeCString("\t", *ostr); writeIntText(row_count, *ostr); writeCString("\n", *ostr); From 3a13c3e372442d2f80fb4143a1ff4d3ee79aa6f2 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 6 May 2022 16:50:34 +0000 Subject: [PATCH 052/615] Fix comments --- src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h index d10735b6227..c7ac009654a 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h @@ -17,7 +17,6 @@ namespace DB class JSONColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat { public: - /// no_escapes - do not use ANSI escape sequences - to display in the browser, not in the console. JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, size_t indent_ = 0); String getName() const override { return "JSONColumnsBlockOutputFormat"; } From cd8600c583321c5015f1a5090777e8230fb7f3fa Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 6 May 2022 16:51:12 +0000 Subject: [PATCH 053/615] Add tests --- .../02293_formats_json_columns.reference | 223 ++++++++++++++++++ .../0_stateless/02293_formats_json_columns.sh | 118 +++++++++ 2 files changed, 341 insertions(+) create mode 100644 tests/queries/0_stateless/02293_formats_json_columns.reference create mode 100755 tests/queries/0_stateless/02293_formats_json_columns.sh diff --git a/tests/queries/0_stateless/02293_formats_json_columns.reference b/tests/queries/0_stateless/02293_formats_json_columns.reference new file mode 100644 index 00000000000..6ceda30c840 --- /dev/null +++ b/tests/queries/0_stateless/02293_formats_json_columns.reference @@ -0,0 +1,223 @@ +JSONColumns +{ + "a": [0, 1], + "b": ["String", "String"], + "c": [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]]] +} +{ + "a": [2, 3], + "b": ["String", "String"], + "c": [[[[0,1],"String"],[[0,1],"gnirtS"]], [[[],"String"],[[0,1,2],"gnirtS"]]] +} +{ + "a": [4], + "b": ["String"], + "c": [[[[0],"String"],[[],"gnirtS"]]] +} +a Nullable(Float64) +b Nullable(String) +c Array(Tuple(Array(Nullable(Float64)), Nullable(String))) +0 String [([],'String'),([],'gnirtS')] +1 String [([0],'String'),([0],'gnirtS')] +2 String [([0,1],'String'),([0,1],'gnirtS')] +3 String [([],'String'),([0,1,2],'gnirtS')] +4 String [([0],'String'),([],'gnirtS')] +JSONColumnsMonoBlock +{ + "a": [0, 1, 2, 3, 4], + "b": ["String", "String", "String", "String", "String"], + "c": [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]], [[[0,1],"String"],[[0,1],"gnirtS"]], [[[],"String"],[[0,1,2],"gnirtS"]], [[[0],"String"],[[],"gnirtS"]]] +} +a Nullable(Float64) +b Nullable(String) +c Array(Tuple(Array(Nullable(Float64)), Nullable(String))) +0 String [([],'String'),([],'gnirtS')] +1 String [([0],'String'),([0],'gnirtS')] +2 String [([0,1],'String'),([0,1],'gnirtS')] +3 String [([],'String'),([0,1,2],'gnirtS')] +4 String [([0],'String'),([],'gnirtS')] +JSONColumnsMonoBlock 3 rows +{ + "a": [0, 1, 2], + "b": ["String", "String", "String"], + "c": [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]], [[[0,1],"String"],[[0,1],"gnirtS"]]] +} +a Nullable(Float64) +b Nullable(String) +c Array(Tuple(Array(Nullable(Float64)), Nullable(String))) +0 String [([],'String'),([],'gnirtS')] +1 String [([0],'String'),([0],'gnirtS')] +2 String [([0,1],'String'),([0,1],'gnirtS')] +JSONCompactColumns +[ + [0, 1], + ["String", "String"], + [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]]] +] +[ + [2, 3], + ["String", "String"], + [[[[0,1],"String"],[[0,1],"gnirtS"]], [[[],"String"],[[0,1,2],"gnirtS"]]] +] +[ + [4], + ["String"], + [[[[0],"String"],[[],"gnirtS"]]] +] +c1 Nullable(Float64) +c2 Nullable(String) +c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String))) +0 String [([],'String'),([],'gnirtS')] +1 String [([0],'String'),([0],'gnirtS')] +2 String [([0,1],'String'),([0,1],'gnirtS')] +3 String [([],'String'),([0,1,2],'gnirtS')] +4 String [([0],'String'),([],'gnirtS')] +JSONCompactColumnsMonoBlock +[ + [0, 1, 2, 3, 4], + ["String", "String", "String", "String", "String"], + [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]], [[[0,1],"String"],[[0,1],"gnirtS"]], [[[],"String"],[[0,1,2],"gnirtS"]], [[[0],"String"],[[],"gnirtS"]]] +] +c1 Nullable(Float64) +c2 Nullable(String) +c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String))) +0 String [([],'String'),([],'gnirtS')] +1 String [([0],'String'),([0],'gnirtS')] +2 String [([0,1],'String'),([0,1],'gnirtS')] +3 String [([],'String'),([0,1,2],'gnirtS')] +4 String [([0],'String'),([],'gnirtS')] +JSONCompactColumnsMonoBlock 3 rows +[ + [0, 1, 2], + ["String", "String", "String"], + [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]], [[[0,1],"String"],[[0,1],"gnirtS"]]] +] +c1 Nullable(Float64) +c2 Nullable(String) +c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String))) +0 String [([],'String'),([],'gnirtS')] +1 String [([0],'String'),([0],'gnirtS')] +2 String [([0,1],'String'),([0,1],'gnirtS')] +JSONColumnsWithMetadata +{ + "meta": + [ + { + "name": "sum", + "type": "UInt64" + }, + { + "name": "avg", + "type": "Float64" + } + ], + + "data": + { + "sum": ["1", "2", "3", "4"], + "avg": [1, 2, 3, 2] + }, + + "totals": + { + "sum": "10", + "avg": 2 + }, + + "extremes": + { + "min": + { + "sum": "1", + "avg": 1 + }, + "max": + { + "sum": "4", + "avg": 3 + } + }, + + "rows": 4, + + "statistics": + { + "rows_read": 5, + "bytes_read": 20 + } +} +JSONColumnsWithMetadata 3 rows +{ + "meta": + [ + { + "name": "sum", + "type": "UInt64" + }, + { + "name": "avg", + "type": "Float64" + } + ], + + "data": + { + "sum": ["1", "2", "3", "4"], + "avg": [1, 2, 3, 2] + }, + + "totals": + { + "sum": "10", + "avg": 2 + }, + + "extremes": + { + "min": + { + "sum": "1", + "avg": 1 + }, + "max": + { + "sum": "4", + "avg": 3 + } + }, + + "rows": 4, + + "statistics": + { + "rows_read": 5, + "bytes_read": 20 + } +} +b Nullable(Float64) +a Nullable(Float64) +c Nullable(Float64) +d Nullable(String) +1 3 \N \N +2 2 \N \N +3 1 \N \N +\N \N 1 \N +\N \N 2 \N +\N \N 3 \N +\N \N \N String +OK +3 +2 +1 +c1 Nullable(Float64) +c2 Nullable(Float64) +c3 Nullable(String) +1 1 \N +2 2 \N +3 3 \N +1 \N \N +2 \N \N +3 \N \N +1 2 String +OK +OK diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh new file mode 100755 index 00000000000..d416cf6da5a --- /dev/null +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +DATA_FILE=$USER_FILES_PATH/data_02293 + +$CLICKHOUSE_CLIENT -q "drop table if exists test_02293" +$CLICKHOUSE_CLIENT -q "create table test_02293 (a UInt32, b String, c Array(Tuple(Array(UInt32), String))) engine=Memory" +$CLICKHOUSE_CLIENT -q "insert into test_02293 select number, 'String', [(range(number % 3), 'String'), (range(number % 4), 'gnirtS')] from numbers(5) settings max_block_size=2" + +echo "JSONColumns" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" + +echo "JSONColumnsMonoBlock" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumnsMonoBlock" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumnsMonoBlock" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" + +echo "JSONColumnsMonoBlock 3 rows" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumnsMonoBlock settings output_format_json_columns_max_rows_to_buffer=3" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumnsMonoBlock settings output_format_json_columns_max_rows_to_buffer=3" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" + +echo "JSONCompactColumns" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" + +echo "JSONCompactColumnsMonoBlock" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumnsMonoBlock" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumnsMonoBlock" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" + +echo "JSONCompactColumnsMonoBlock 3 rows" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumnsMonoBlock settings output_format_json_columns_max_rows_to_buffer=3" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumnsMonoBlock settings output_format_json_columns_max_rows_to_buffer=3" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" + +echo "JSONColumnsWithMetadata" +$CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata" --extremes=1 | grep -v "elapsed" +echo "JSONColumnsWithMetadata 3 rows" +$CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata settings output_format_json_columns_max_rows_to_buffer=3" --extremes=1 | grep -v "elapsed" + + +echo ' +{ + "b": [1, 2, 3], + "a": [3, 2, 1] +} +{ + "c": [1, 2, 3] +} +{ +} +{ + "a": [], + "d": [] +} +{ + "d": ["String"] +} +' > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=1" + +echo ' +[ + [1, 2, 3], + [1, 2, 3] +] +[ + [1, 2, 3] +] +[ +] +[ + [], + [] +] +[ + [1], + [2], + ["String"] +] +' > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' + +echo ' +{ + "a": [null, null, null], + "b": [3, 2, 1] +} +{ + "a": [1, 2, 3] +} +' > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' + + From 9c5ad1e7736db7239da185bc30c605f25b4a9c01 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 6 May 2022 16:51:50 +0000 Subject: [PATCH 054/615] Add docs --- docs/en/interfaces/formats.md | 100 ++++++++++++++++++++++++ docs/en/operations/settings/settings.md | 6 ++ 2 files changed, 106 insertions(+) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index f21858ccc25..d4a68ca72fa 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -30,8 +30,13 @@ The supported formats are: | [JSON](#json) | ✗ | ✔ | | [JSONAsString](#jsonasstring) | ✔ | ✗ | | [JSONStrings](#jsonstrings) | ✗ | ✔ | +| [JSONColumns](#jsoncolumns) | ✔ | ✔ | +| [JSONColumnsMonoBlock](#jsoncolumnsmonoblock) | ✗ | ✔ | +| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | ✗ | ✔ | | [JSONCompact](#jsoncompact) | ✗ | ✔ | | [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | +| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | +| [JSONCompactColumnsMonoBlock](#jsoncompactcolumnsmonoblock) | ✗ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | | [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | | [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | @@ -565,6 +570,82 @@ Example: } ``` +## JSONColumns {#jsoncolumns} + +In this format, each block of data is represented as a JSON Object: + +```json +{ + "name1": [1, 2, 3, 4], + "name2": ["Hello", ",", "world", "!"], + "name3": [[1, 2], [3, 4], [5, 6], [7, 8]] +} +``` + +Columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. +Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) + +## JSONColumnsMonoBlock {#jsoncolumnsmonoblock} + +Differs from JSONColumns in that it buffers up to [output_format_json_columns_max_rows_to_buffer](../operations/settings/settings.md#output-format-json-columns-max-rows-to-buffer) +rows and then outputs them as a single block. + +## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} + +Differs from JSON output format in that it outputs columns as in JSONColumns format. This format buffers up to [output_format_json_columns_max_rows_to_buffer](../operations/settings/settings.md#session_settings-output-format-json-columns-max-rows-to-buffer) +rows and then outputs them as a single block. + +```json +{ + "meta": + [ + { + "name": "sum", + "type": "UInt64" + }, + { + "name": "avg", + "type": "Float64" + } + ], + + "data": + { + "sum": ["1", "2", "3", "4"], + "avg": [1, 2, 3, 2] + }, + + "totals": + { + "sum": "10", + "avg": 2 + }, + + "extremes": + { + "min": + { + "sum": "1", + "avg": 1 + }, + "max": + { + "sum": "4", + "avg": 3 + } + }, + + "rows": 4, + + "statistics": + { + "elapsed": 0.003701718, + "rows_read": 5, + "bytes_read": 20 + } +} +``` + ## JSONAsString {#jsonasstring} In this format, a single JSON object is interpreted as a single value. If the input has several JSON objects (comma separated), they are interpreted as separate rows. If the input data is enclosed in square brackets, it is interpreted as an array of JSONs. @@ -683,6 +764,25 @@ Example: } ``` +## JSONCompactColumns {#jsoncompactcolumns} + +In this format, each block of data is represented as a JSON array of arrays: + +```json +[ + [1, 2, 3, 4], + ["Hello", ",", "world", "!"], + [[1, 2], [3, 4], [5, 6], [7, 8]] +] +``` + +Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) + +## JSONCompactColumnsMonoBlock {#jsoncompactcolumnsmonoblock} + +Differs from JSONCompactColumns in that it buffers up to [output_format_json_columns_max_rows_to_buffer](../operations/settings/settings.md#session_settings-output-format-json-columns-max-rows-to-buffer) +rows and then outputs them as a single block. + ## JSONEachRow {#jsoneachrow} ## JSONStringsEachRow {#jsonstringseachrow} ## JSONCompactEachRow {#jsoncompacteachrow} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8f2b9bc86fc..ef52ab415bf 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3784,6 +3784,12 @@ Possible values: Default value: `0`. +## output_format_json_columns_max_rows_to_buffer {#output-format-json-columns-max-rows-to-buffer} + +The maximum rows to buffer in formats JSONColumnsMonoBlock/JSONCompactColumnsMonoBlock/JSONColumnsWithMetadata + +Default value: `10000`. + ## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries. From c4b357d5cb7efd8eccc93c02de912cd0b5c3aec8 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 7 May 2022 08:34:52 +0000 Subject: [PATCH 055/615] update --- src/Storages/WindowView/StorageWindowView.cpp | 47 +++++++++---------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 85ea9ee214e..888e78236ca 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -1208,34 +1209,30 @@ BlockIO StorageWindowView::populate() throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "POPULATE is not supported when using function now() as the time column"); - auto modified_query = select_query->clone(); - auto & modified_select = modified_query->as(); - - auto analyzer_res = TreeRewriterResult({}); - removeJoin(modified_select, analyzer_res, getContext()); - - modified_select.setExpression(ASTSelectQuery::Expression::HAVING, {}); - modified_select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); - - auto select = std::make_shared(); - select->children.push_back(std::make_shared()); - modified_select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(select)); - - auto order_by = std::make_shared(); - auto order_by_elem = std::make_shared(); - order_by_elem->children.push_back(std::make_shared(timestamp_column_name)); - order_by_elem->direction = 1; - order_by->children.push_back(order_by_elem); - modified_select.setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_by)); - QueryPipelineBuilder pipeline; - /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - InterpreterSelectQuery interpreter_select{modified_query, getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)}; - pipeline = interpreter_select.buildQueryPipeline(); + InterpreterSelectQuery interpreter_fetch{select_query, getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns)}; + pipeline = interpreter_fetch.buildQueryPipeline(); - auto header_block = interpreter_select.getSampleBlock(); - auto sink = std::make_shared(header_block, *this, nullptr, getContext()); + SortDescription order_descr; + order_descr.emplace_back(timestamp_column_name); + + pipeline.addSimpleTransform( + [&](const Block & header) + { + return std::make_shared( + header, + order_descr, + getContext()->getSettingsRef().max_block_size, + 0 /*LIMIT*/, + getContext()->getSettingsRef().max_bytes_before_remerge_sort, + getContext()->getSettingsRef().remerge_sort_lowered_memory_bytes_ratio, + getContext()->getSettingsRef().max_bytes_before_external_sort, + getContext()->getTemporaryVolume(), + getContext()->getSettingsRef().min_free_disk_space_for_temporary_data); + }); + + auto sink = std::make_shared(interpreter_fetch.getSampleBlock(), *this, nullptr, getContext()); BlockIO res; From c16ce7657ec2daf7eb0699096e909193689143a3 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Fri, 6 May 2022 21:25:20 -0400 Subject: [PATCH 056/615] add hashid support --- .gitmodules | 3 + contrib/CMakeLists.txt | 1 + contrib/hashidsxx | 1 + contrib/hashidsxx-cmake/CMakeLists.txt | 21 +++ src/Functions/CMakeLists.txt | 4 + src/Functions/FunctionHashID.cpp | 13 ++ src/Functions/FunctionHashID.h | 153 ++++++++++++++++++ src/Functions/registerFunctions.cpp | 2 + src/configure_config.cmake | 3 + .../0_stateless/02293_hashid.reference | 5 + tests/queries/0_stateless/02293_hashid.sql | 1 + .../02293_hashid_arguments.reference | 5 + .../0_stateless/02293_hashid_arguments.sql | 1 + .../0_stateless/02293_hashid_const.reference | 1 + .../0_stateless/02293_hashid_const.sql | 1 + 15 files changed, 215 insertions(+) create mode 160000 contrib/hashidsxx create mode 100644 contrib/hashidsxx-cmake/CMakeLists.txt create mode 100644 src/Functions/FunctionHashID.cpp create mode 100644 src/Functions/FunctionHashID.h create mode 100644 tests/queries/0_stateless/02293_hashid.reference create mode 100644 tests/queries/0_stateless/02293_hashid.sql create mode 100644 tests/queries/0_stateless/02293_hashid_arguments.reference create mode 100644 tests/queries/0_stateless/02293_hashid_arguments.sql create mode 100644 tests/queries/0_stateless/02293_hashid_const.reference create mode 100644 tests/queries/0_stateless/02293_hashid_const.sql diff --git a/.gitmodules b/.gitmodules index 6c9e66f9cbc..0972ab6a88a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -262,3 +262,6 @@ [submodule "contrib/minizip-ng"] path = contrib/minizip-ng url = https://github.com/zlib-ng/minizip-ng +[submodule "contrib/hashidsxx"] + path = contrib/hashidsxx + url = https://github.com/schoentoon/hashidsxx.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1f03c0fd341..627885c67b3 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -139,6 +139,7 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) +add_contrib (hashidsxx-cmake hashidsxx) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) diff --git a/contrib/hashidsxx b/contrib/hashidsxx new file mode 160000 index 00000000000..783f6911ccf --- /dev/null +++ b/contrib/hashidsxx @@ -0,0 +1 @@ +Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f916355251d --- /dev/null +++ b/contrib/hashidsxx-cmake/CMakeLists.txt @@ -0,0 +1,21 @@ +option(ENABLE_HASHIDSXX "Enable hashidsxx" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_HASHIDSXX) + message(STATUS "Not using hashidsxx") + return() +endif() + +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx") + +set (SRCS + "${LIBRARY_DIR}/hashids.cpp" +) + +set (HDRS + "${LIBRARY_DIR}/hashids.h" +) + +add_library(_hashidsxx ${SRCS} ${HDRS}) +target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}") + +add_library(ch_contrib::hashidsxx ALIAS _hashidsxx) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index debe7fac8a5..a982ee367de 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -96,6 +96,10 @@ if (TARGET ch_contrib::rapidjson) target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson) endif() +if (TARGET ch_contrib::hashidsxx) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::hashidsxx) +endif() + add_subdirectory(GatherUtils) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils) diff --git a/src/Functions/FunctionHashID.cpp b/src/Functions/FunctionHashID.cpp new file mode 100644 index 00000000000..14e0c7c35f3 --- /dev/null +++ b/src/Functions/FunctionHashID.cpp @@ -0,0 +1,13 @@ +#include "FunctionHashID.h" + +#include + + +namespace DB +{ + +void registerFunctionHashID(FunctionFactory & factory) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h new file mode 100644 index 00000000000..3443b6d8408 --- /dev/null +++ b/src/Functions/FunctionHashID.h @@ -0,0 +1,153 @@ +#pragma once + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +} + +// hashid(string, salt) +class FunctionHashID : public IFunction +{ +public: + static constexpr auto name = "hashid"; + + static FunctionPtr create(ContextPtr) { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 0; } + + bool isVariadic() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() < 1) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName()); + + if (!isUnsignedInteger(arguments[0].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument of function {} must be unsigned integer, got {}", getName(), arguments[0].type->getName()); + + if (arguments.size() > 1) + { + if (!isString(arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument of function {} must be String, got {}", + getName(), arguments[1].type->getName()); + } + + if (arguments.size() > 2) + { + if (!isUInt8(arguments[2].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument of function {} must be UInt8, got {}", + getName(), arguments[2].type->getName()); + } + + if (arguments.size() > 3) + { + if (!isString(arguments[3].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Fourth argument of function {} must be String, got {}", + getName(), arguments[3].type->getName()); + } + + if (arguments.size() > 4) + { + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, + "Function {} expect no more than three arguments (integer, salt, optional_alphabet), got {}", + getName(), arguments.size()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & numcolumn = arguments[0].column; + + if ( + checkAndGetColumn(numcolumn.get()) + || checkAndGetColumn(numcolumn.get()) + || checkAndGetColumn(numcolumn.get()) + || checkAndGetColumn(numcolumn.get()) + || checkAndGetColumnConst(numcolumn.get()) + || checkAndGetColumnConst(numcolumn.get()) + || checkAndGetColumnConst(numcolumn.get()) + || checkAndGetColumnConst(numcolumn.get()) + ) + { + std::string salt; + UInt8 minLength = 0; + std::string alphabet(DEFAULT_ALPHABET); + + if (arguments.size() >= 4) + { + const auto & alphabetcolumn = arguments[3].column; + if (auto alpha_col = checkAndGetColumnConst(alphabetcolumn.get())) + alphabet = alpha_col->getValue(); + } + + if (arguments.size() >= 3) + { + const auto & minlengthcolumn = arguments[2].column; + if (auto min_length_col = checkAndGetColumnConst(minlengthcolumn.get())) + minLength = min_length_col->getValue(); + } + + if (arguments.size() >= 2) + { + const auto & saltcolumn = arguments[1].column; + if (auto salt_col = checkAndGetColumnConst(saltcolumn.get())) + salt = salt_col->getValue(); + } + + hashidsxx::Hashids hash(salt, minLength, alphabet); + + auto col_res = ColumnString::create(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + std::string hashid = hash.encode({ numcolumn->getUInt(i) }); + col_res->insertDataWithTerminatingZero(hashid.data(), hashid.size() + 1); + } + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of first argument of function hashid", + ErrorCodes::ILLEGAL_COLUMN); + + + } +}; + +} diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 6b3c6e92945..2472b78cbcd 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -24,6 +24,7 @@ void registerFunctionsEmbeddedDictionaries(FunctionFactory &); void registerFunctionsExternalDictionaries(FunctionFactory &); void registerFunctionsExternalModels(FunctionFactory &); void registerFunctionsFormatting(FunctionFactory &); +void registerFunctionHashID(FunctionFactory &); void registerFunctionsHashing(FunctionFactory &); void registerFunctionsHigherOrder(FunctionFactory &); void registerFunctionsLogical(FunctionFactory &); @@ -90,6 +91,7 @@ void registerFunctions() registerFunctionsExternalDictionaries(factory); registerFunctionsExternalModels(factory); registerFunctionsFormatting(factory); + registerFunctionHashID(factory); registerFunctionsHashing(factory); registerFunctionsHigherOrder(factory); registerFunctionsLogical(factory); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 519307ba28a..aa1419c7792 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -100,3 +100,6 @@ endif() if (TARGET ch_contrib::jemalloc) set(USE_JEMALLOC 1) endif() +if (TARGET ch_contrib::hashidsxx) + set(USE_HASHIDSXX 1) +endif() diff --git a/tests/queries/0_stateless/02293_hashid.reference b/tests/queries/0_stateless/02293_hashid.reference new file mode 100644 index 00000000000..05023857670 --- /dev/null +++ b/tests/queries/0_stateless/02293_hashid.reference @@ -0,0 +1,5 @@ +0 gY +1 jR +2 k5 +3 l5 +4 mO diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql new file mode 100644 index 00000000000..51bed96c039 --- /dev/null +++ b/tests/queries/0_stateless/02293_hashid.sql @@ -0,0 +1 @@ +select number, hashid(number) from system.numbers limit 5; diff --git a/tests/queries/0_stateless/02293_hashid_arguments.reference b/tests/queries/0_stateless/02293_hashid_arguments.reference new file mode 100644 index 00000000000..41f3b213cdb --- /dev/null +++ b/tests/queries/0_stateless/02293_hashid_arguments.reference @@ -0,0 +1,5 @@ +0 pbgkmdljlpjoapne +1 akemglnjepjpodba +2 obmgndljgajpkeao +3 dldokmpjpgjgeanb +4 nkdlpgajngjnobme diff --git a/tests/queries/0_stateless/02293_hashid_arguments.sql b/tests/queries/0_stateless/02293_hashid_arguments.sql new file mode 100644 index 00000000000..f1cb3a144e7 --- /dev/null +++ b/tests/queries/0_stateless/02293_hashid_arguments.sql @@ -0,0 +1 @@ +select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; diff --git a/tests/queries/0_stateless/02293_hashid_const.reference b/tests/queries/0_stateless/02293_hashid_const.reference new file mode 100644 index 00000000000..93bd202307e --- /dev/null +++ b/tests/queries/0_stateless/02293_hashid_const.reference @@ -0,0 +1 @@ +YQrvD5XGvbx diff --git a/tests/queries/0_stateless/02293_hashid_const.sql b/tests/queries/0_stateless/02293_hashid_const.sql new file mode 100644 index 00000000000..b8308d3f55b --- /dev/null +++ b/tests/queries/0_stateless/02293_hashid_const.sql @@ -0,0 +1 @@ +select hashid(1234567890123456, 's3cr3t'); From 34f1821eb932857144b8d35942fc4ea0a7b65087 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 8 May 2022 23:43:08 +0800 Subject: [PATCH 057/615] function now support for windowview populate --- src/Interpreters/InterpreterCreateQuery.cpp | 17 ++++---- src/Storages/WindowView/StorageWindowView.cpp | 40 +++++++++---------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 92687e864cf..c8784522207 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1436,9 +1436,17 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) { /// If the query is a CREATE SELECT, insert the data into the table. if (create.select && !create.attach - && !create.is_ordinary_view && !create.is_live_view && !create.is_window_view + && !create.is_ordinary_view && !create.is_live_view && (!create.is_materialized_view || create.is_populate)) { + if (create.is_window_view) + { + auto table = DatabaseCatalog::instance().getTable({create.getDatabase(), create.getTable(), create.uuid}, getContext()); + if (auto * window_view = typeid_cast(table.get())) + return window_view->populate(); + return {}; + } + auto insert = std::make_shared(); insert->table_id = {create.getDatabase(), create.getTable(), create.uuid}; insert->select = create.select->clone(); @@ -1446,13 +1454,6 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) return InterpreterInsertQuery(insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); } - else if (create.select && !create.attach && create.is_window_view && create.is_populate) - { - auto table = DatabaseCatalog::instance().getTable({create.getDatabase(), create.getTable(), create.uuid}, getContext()); - if (auto * window_view = dynamic_cast(table.get())) - return window_view->populate(); - return {}; - } return {}; } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 97bdba90cfb..1321440f140 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1207,32 +1207,30 @@ private: BlockIO StorageWindowView::populate() { - if (is_time_column_func_now) - throw Exception( - ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "POPULATE is not supported when using function now() as the time column"); - QueryPipelineBuilder pipeline; InterpreterSelectQuery interpreter_fetch{select_query, getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns)}; pipeline = interpreter_fetch.buildQueryPipeline(); - SortDescription order_descr; - order_descr.emplace_back(timestamp_column_name); - - pipeline.addSimpleTransform( - [&](const Block & header) - { - return std::make_shared( - header, - order_descr, - getContext()->getSettingsRef().max_block_size, - 0 /*LIMIT*/, - getContext()->getSettingsRef().max_bytes_before_remerge_sort, - getContext()->getSettingsRef().remerge_sort_lowered_memory_bytes_ratio, - getContext()->getSettingsRef().max_bytes_before_external_sort, - getContext()->getTemporaryVolume(), - getContext()->getSettingsRef().min_free_disk_space_for_temporary_data); - }); + if (!is_time_column_func_now) + { + SortDescription order_descr; + order_descr.emplace_back(timestamp_column_name); + pipeline.addSimpleTransform( + [&](const Block & header) + { + return std::make_shared( + header, + order_descr, + getContext()->getSettingsRef().max_block_size, + 0 /*LIMIT*/, + getContext()->getSettingsRef().max_bytes_before_remerge_sort, + getContext()->getSettingsRef().remerge_sort_lowered_memory_bytes_ratio, + getContext()->getSettingsRef().max_bytes_before_external_sort, + getContext()->getTemporaryVolume(), + getContext()->getSettingsRef().min_free_disk_space_for_temporary_data); + }); + } auto sink = std::make_shared(interpreter_fetch.getSampleBlock(), *this, nullptr, getContext()); From 64e4c546bcc078a2dcdae3d86981ab1720ce7c29 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 8 May 2022 16:47:22 +0000 Subject: [PATCH 058/615] add test for windowview populate --- ...view_proc_tumble_to_now_populate.reference | 1 + ...window_view_proc_tumble_to_now_populate.sh | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.reference create mode 100755 tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh diff --git a/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.reference b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh new file mode 100755 index 00000000000..83c35779059 --- /dev/null +++ b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery < Date: Sun, 8 May 2022 16:10:10 +0900 Subject: [PATCH 059/615] guard against hashid support being disabled --- src/Common/config.h.in | 1 + src/Functions/FunctionHashID.cpp | 4 ++++ src/Functions/FunctionHashID.h | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index d8d308c59bd..0715cc2335f 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -26,3 +26,4 @@ #cmakedefine01 USE_ODBC #cmakedefine01 USE_REPLXX #cmakedefine01 USE_JEMALLOC +#cmakedefine01 USE_HASHIDSXX diff --git a/src/Functions/FunctionHashID.cpp b/src/Functions/FunctionHashID.cpp index 14e0c7c35f3..5983af73391 100644 --- a/src/Functions/FunctionHashID.cpp +++ b/src/Functions/FunctionHashID.cpp @@ -1,5 +1,7 @@ #include "FunctionHashID.h" +#if USE_HASHIDSXX + #include @@ -11,3 +13,5 @@ void registerFunctionHashID(FunctionFactory & factory) factory.registerFunction(); } } + +#endif diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index 3443b6d8408..6cd8e153a1f 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -2,6 +2,8 @@ #include +#if USE_HASHIDSXX + #include #include @@ -151,3 +153,5 @@ public: }; } + +#endif From e9f8114738ccb75f02e35a724c431ee648889130 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Mon, 9 May 2022 09:00:10 +0900 Subject: [PATCH 060/615] clean up std::string usage --- src/Functions/FunctionHashID.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index 6cd8e153a1f..a1de8c012c7 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -108,7 +108,7 @@ public: { std::string salt; UInt8 minLength = 0; - std::string alphabet(DEFAULT_ALPHABET); + std::string alphabet; if (arguments.size() >= 4) { @@ -116,6 +116,8 @@ public: if (auto alpha_col = checkAndGetColumnConst(alphabetcolumn.get())) alphabet = alpha_col->getValue(); } + else + alphabet.assign(DEFAULT_ALPHABET); if (arguments.size() >= 3) { @@ -135,9 +137,11 @@ public: auto col_res = ColumnString::create(); + std::string hashid; + for (size_t i = 0; i < input_rows_count; ++i) { - std::string hashid = hash.encode({ numcolumn->getUInt(i) }); + hashid.assign(hash.encode({ numcolumn->getUInt(i) })); col_res->insertDataWithTerminatingZero(hashid.data(), hashid.size() + 1); } From e87309ae8d3207dd4bd3fd16088a0d42f81f468c Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Mon, 9 May 2022 09:33:47 +0900 Subject: [PATCH 061/615] clang-format FunctionHashID --- src/Functions/FunctionHashID.h | 77 ++++++++++++++++------------------ 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index a1de8c012c7..a593d41a9a1 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -6,12 +6,12 @@ #include -#include -#include #include -#include +#include +#include #include #include +#include #include #include @@ -33,14 +33,9 @@ class FunctionHashID : public IFunction public: static constexpr auto name = "hashid"; - static FunctionPtr create(ContextPtr) { - return std::make_shared(); - } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -54,38 +49,49 @@ public: throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName()); if (!isUnsignedInteger(arguments[0].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument of function {} must be unsigned integer, got {}", getName(), arguments[0].type->getName()); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument of function {} must be unsigned integer, got {}", + getName(), + arguments[0].type->getName()); if (arguments.size() > 1) { if (!isString(arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument of function {} must be String, got {}", - getName(), arguments[1].type->getName()); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument of function {} must be String, got {}", + getName(), + arguments[1].type->getName()); } if (arguments.size() > 2) { if (!isUInt8(arguments[2].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument of function {} must be UInt8, got {}", - getName(), arguments[2].type->getName()); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument of function {} must be UInt8, got {}", + getName(), + arguments[2].type->getName()); } if (arguments.size() > 3) { if (!isString(arguments[3].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Fourth argument of function {} must be String, got {}", - getName(), arguments[3].type->getName()); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Fourth argument of function {} must be String, got {}", + getName(), + arguments[3].type->getName()); } if (arguments.size() > 4) { - throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, - "Function {} expect no more than three arguments (integer, salt, optional_alphabet), got {}", - getName(), arguments.size()); + throw Exception( + ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, + "Function {} expect no more than three arguments (integer, salt, optional_alphabet), got {}", + getName(), + arguments.size()); } return std::make_shared(); @@ -95,16 +101,10 @@ public: { const auto & numcolumn = arguments[0].column; - if ( - checkAndGetColumn(numcolumn.get()) - || checkAndGetColumn(numcolumn.get()) - || checkAndGetColumn(numcolumn.get()) - || checkAndGetColumn(numcolumn.get()) - || checkAndGetColumnConst(numcolumn.get()) - || checkAndGetColumnConst(numcolumn.get()) - || checkAndGetColumnConst(numcolumn.get()) - || checkAndGetColumnConst(numcolumn.get()) - ) + if (checkAndGetColumn(numcolumn.get()) || checkAndGetColumn(numcolumn.get()) + || checkAndGetColumn(numcolumn.get()) || checkAndGetColumn(numcolumn.get()) + || checkAndGetColumnConst(numcolumn.get()) || checkAndGetColumnConst(numcolumn.get()) + || checkAndGetColumnConst(numcolumn.get()) || checkAndGetColumnConst(numcolumn.get())) { std::string salt; UInt8 minLength = 0; @@ -141,18 +141,15 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { - hashid.assign(hash.encode({ numcolumn->getUInt(i) })); + hashid.assign(hash.encode({numcolumn->getUInt(i)})); col_res->insertDataWithTerminatingZero(hashid.data(), hashid.size() + 1); } return col_res; } else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of first argument of function hashid", - ErrorCodes::ILLEGAL_COLUMN); - - + throw Exception( + "Illegal column " + arguments[0].column->getName() + " of first argument of function hashid", ErrorCodes::ILLEGAL_COLUMN); } }; From c4cf1c863ba64c0f21f5048146a25d3219591381 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 4 May 2022 06:26:38 +0000 Subject: [PATCH 062/615] Add precommit initial --- src/Coordination/KeeperServer.cpp | 37 ++++++++++++++++++++++++- src/Coordination/KeeperStateMachine.cpp | 7 +++++ src/Coordination/KeeperStateMachine.h | 2 +- src/Coordination/KeeperStorage.h | 1 - src/Coordination/NodesAccessor.h | 0 5 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 src/Coordination/NodesAccessor.h diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 6961f31ed20..03d9d6ea373 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -487,7 +487,42 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ } if (initialized_flag) - return nuraft::cb_func::ReturnCode::Ok; + { + switch (type) + { + case nuraft::cb_func::PreAppendLog: + { + auto & entry = *static_cast(param->ctx); + ReadBufferFromNuraftBuffer buffer(entry->get_buf()); + KeeperStorage::RequestForSession request_for_session; + readIntBinary(request_for_session.session_id, buffer); + + int32_t length; + Coordination::read(length, buffer); + + int32_t xid; + Coordination::read(xid, buffer); + + Coordination::OpNum opnum; + + Coordination::read(opnum, buffer); + + request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum); + request_for_session.request->xid = xid; + request_for_session.request->readImpl(buffer); + + if (!buffer.eof()) + readIntBinary(request_for_session.time, buffer); + else /// backward compatibility + request_for_session.time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + + LOG_INFO(log, "Preappend for log type={} opnum={} xid={}", entry->get_val_type(), static_cast(opnum), xid); + return nuraft::cb_func::ReturnCode::Ok; + } + default: + return nuraft::cb_func::ReturnCode::Ok; + } + } size_t last_commited = state_machine->last_commit_index(); size_t next_index = state_manager->getLogStore()->next_slot(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index be7110fa841..492a760ae20 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -114,6 +114,13 @@ void KeeperStateMachine::init() storage = std::make_unique(coordination_settings->dead_session_check_period_ms.totalMilliseconds(), superdigest); } +nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) +{ + auto request_for_session = parseRequest(data); + LOG_WARNING(log, "Precommitting {}", log_idx); + return nullptr; +} + nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) { auto request_for_session = parseRequest(data); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 73578e6a2ba..44a47c288e6 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -28,7 +28,7 @@ public: void init(); /// Currently not supported - nuraft::ptr pre_commit(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; } + nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index ccbddcf6e19..955c79512c9 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -172,7 +172,6 @@ public: void enableSnapshotMode(size_t up_to_version) { container.enableSnapshotMode(up_to_version); - } /// Turn off snapshot mode. diff --git a/src/Coordination/NodesAccessor.h b/src/Coordination/NodesAccessor.h new file mode 100644 index 00000000000..e69de29bb2d From 94ba3d129cdfe7e66d87f33949a5aff8f795ad03 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 5 May 2022 10:32:41 +0000 Subject: [PATCH 063/615] Define preprocess with updated nodes --- src/Coordination/KeeperServer.cpp | 33 ++--- src/Coordination/KeeperStateMachine.cpp | 14 ++ src/Coordination/KeeperStateMachine.h | 2 + src/Coordination/KeeperStorage.cpp | 121 +++++++++++++++++- src/Coordination/KeeperStorage.h | 33 +++++ .../WriteBufferFromNuraftBuffer.h | 1 - 6 files changed, 179 insertions(+), 25 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 03d9d6ea373..0ff390cead9 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -373,6 +373,7 @@ nuraft::ptr getZooKeeperLogEntry(int64_t session_id, int64_t tim DB::writeIntBinary(session_id, buf); request->write(buf); DB::writeIntBinary(time, buf); + DB::writeIntBinary(static_cast(0), buf); return buf.getBuffer(); } @@ -493,30 +494,16 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ case nuraft::cb_func::PreAppendLog: { auto & entry = *static_cast(param->ctx); - ReadBufferFromNuraftBuffer buffer(entry->get_buf()); - KeeperStorage::RequestForSession request_for_session; - readIntBinary(request_for_session.session_id, buffer); + auto log_store = state_manager->load_log_store(); + auto next_idx = log_store->next_slot(); + auto maybe_digest = state_machine->preprocess(next_idx, entry->get_buf()); + if (maybe_digest) + { + auto & buff = entry->get_buf(); + DB::WriteBuffer buf(reinterpret_cast(buff.data_begin() + buff.size() - sizeof(int64_t)), buff.size()); + DB::writeIntBinary(*maybe_digest, buf); + } - int32_t length; - Coordination::read(length, buffer); - - int32_t xid; - Coordination::read(xid, buffer); - - Coordination::OpNum opnum; - - Coordination::read(opnum, buffer); - - request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum); - request_for_session.request->xid = xid; - request_for_session.request->readImpl(buffer); - - if (!buffer.eof()) - readIntBinary(request_for_session.time, buffer); - else /// backward compatibility - request_for_session.time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); - - LOG_INFO(log, "Preappend for log type={} opnum={} xid={}", entry->get_val_type(), static_cast(opnum), xid); return nuraft::cb_func::ReturnCode::Ok; } default: diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 492a760ae20..ecb9e2cb667 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -44,6 +44,9 @@ namespace else /// backward compatibility request_for_session.time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + int64_t digest; + readIntBinary(digest, buffer); + LOG_INFO(&Poco::Logger::get("STORAGEEE"), "Read digest {}", digest); return request_for_session; } @@ -121,6 +124,17 @@ nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nur return nullptr; } +std::optional KeeperStateMachine::preprocess(const uint64_t log_idx, nuraft::buffer & data) +{ + auto request_for_session = parseRequest(data); + if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) + { + return std::nullopt; + } + std::lock_guard lock(storage_and_responses_lock); + return storage->preprocessRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, log_idx); +} + nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) { auto request_for_session = parseRequest(data); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 44a47c288e6..ae9cc0fe1a0 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -27,6 +27,8 @@ public: /// Read state from the latest snapshot void init(); + std::optional preprocess(uint64_t log_idx, nuraft::buffer & data); + /// Currently not supported nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index f58776cf843..d1c9aa3c16e 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1,4 +1,5 @@ #include +#include "Common/ZooKeeper/ZooKeeperConstants.h" #include #include #include @@ -204,6 +205,40 @@ KeeperStorage::KeeperStorage(int64_t tick_time_ms, const String & superdigest_) container.insert("/", Node()); } +KeeperStorage::CurrentNodePtr KeeperStorage::CurrentNodes::getNode(const std::string &path) +{ + if (auto maybe_node_it = updated_nodes.find(path); maybe_node_it != updated_nodes.end()) + { + return maybe_node_it->second; + } + + if (auto maybe_node_it = storage.container.find(path); maybe_node_it != storage.container.end()) + { + return std::make_shared(-1, maybe_node_it->value.stat, maybe_node_it->value.getData(), maybe_node_it->value.seq_num); + } + + return nullptr; +} + +bool KeeperStorage::CurrentNodes::hasNode(const std::string & path) const +{ + return updated_nodes.contains(path) || storage.container.contains(path); +} + +void KeeperStorage::CurrentNodes::insertNode(const std::string & path, const CurrentNodePtr & new_node) +{ + if (auto maybe_node_it = updated_nodes.find(path); maybe_node_it != updated_nodes.end()) + { + if (maybe_node_it->second == new_node) + return; + + maybe_node_it->second = new_node; + return; + } + + updated_nodes.emplace(path, new_node); +} + using Undo = std::function; struct KeeperStorageRequestProcessor @@ -214,6 +249,7 @@ struct KeeperStorageRequestProcessor : zk_request(zk_request_) {} virtual std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const = 0; + virtual void preprocess(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/) const {} virtual KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const { return {}; } virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/) const { return true; } @@ -268,6 +304,75 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr return checkACL(Coordination::ACL::Create, node_acls, session_auths); } + void preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); + + auto parent_path = parentPath(request.path); + auto parent_node = storage.current_nodes.getNode(std::string{parent_path}); + if (parent_node == nullptr) + { + LOG_INFO(&Poco::Logger::get("Storage"), "Node not found"); + return; + } + else if (parent_node->stat.ephemeralOwner != 0) + { + LOG_INFO(&Poco::Logger::get("Storage"), "Ephemerals cannot have children"); + return; + } + + std::string path_created = request.path; + if (request.is_sequential) + { + auto seq_num = parent_node->seq_num; + + std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + seq_num_str.exceptions(std::ios::failbit); + seq_num_str << std::setw(10) << std::setfill('0') << seq_num; + + path_created += seq_num_str.str(); + } + + if (storage.current_nodes.hasNode(path_created)) + { + LOG_INFO(&Poco::Logger::get("Storage"), "Node exists"); + return; + } + + if (getBaseName(path_created).size == 0) + { + LOG_INFO(&Poco::Logger::get("Storage"), "Invalid path"); + return; + } + + KeeperStorage::CurrentNodePtr created_node = std::make_shared(zxid, Coordination::Stat{}, request.data, 0); + created_node->stat.czxid = zxid; + created_node->stat.mzxid = zxid; + created_node->stat.pzxid = zxid; + created_node->stat.ctime = time; + created_node->stat.mtime = time; + created_node->stat.numChildren = 0; + created_node->stat.dataLength = request.data.length(); + created_node->stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; + + storage.current_nodes.insertNode(path_created, created_node); + + int32_t parent_cversion = request.parent_cversion; + + /// Increment sequential number even if node is not sequential + ++parent_node->seq_num; + + if (parent_cversion == -1) + ++parent_node->stat.cversion; + else if (parent_cversion > parent_node->stat.cversion) + parent_node->stat.cversion = parent_cversion; + + if (zxid > parent_node->stat.pzxid) + parent_node->stat.pzxid = zxid; + ++parent_node->stat.numChildren; + storage.current_nodes.insertNode(std::string{parent_path}, parent_node); + } + std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { auto & container = storage.container; @@ -995,7 +1100,7 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc KeeperStorage::AuthID auth{auth_request.scheme, digest}; auto & session_ids = sessions_and_auth[session_id]; if (std::find(session_ids.begin(), session_ids.end(), auth) == session_ids.end()) - sessions_and_auth[session_id].emplace_back(auth); + session_ids.emplace_back(auth); } } @@ -1084,6 +1189,20 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() } +std::optional KeeperStorage::preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid) +{ + KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(request); + request_processor->preprocess(*this, session_id, time, new_last_zxid); + switch (request->getOpNum()) + { + using enum Coordination::OpNum; + case Create: + return new_last_zxid; + default: + return std::nullopt; + } +} + KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl) { KeeperStorage::ResponsesForSessions results; diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 955c79512c9..23fdd0fbb0f 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -112,6 +112,37 @@ public: /// container. Container container; + struct CurrentNode + { + CurrentNode(int64_t zxid_, Coordination::Stat stat_, String data_, int32_t seq_num_) + : zxid(zxid_) + , stat(stat_) + , seq_num(seq_num_) + , data(std::move(data_)) + {} + + int64_t zxid; + Coordination::Stat stat{}; + int32_t seq_num{0}; + String data; + }; + + using CurrentNodePtr = std::shared_ptr; + + struct CurrentNodes + { + explicit CurrentNodes(KeeperStorage & storage_) : storage(storage_) {} + + CurrentNodePtr getNode(const std::string & path); + bool hasNode(const std::string & path) const; + void insertNode(const std::string & path, const CurrentNodePtr & new_node); + + std::unordered_map> updated_nodes; + KeeperStorage & storage; + }; + + CurrentNodes current_nodes{*this}; + /// Mapping session_id -> set of ephemeral nodes paths Ephemerals ephemerals; /// Mapping session_id -> set of watched nodes paths @@ -127,6 +158,7 @@ public: /// Global id of all requests applied to storage int64_t zxid{0}; bool finalized{false}; + int64_t last_committed_zxid{0}; /// Currently active watches (node_path -> subscribed sessions) Watches watches; @@ -163,6 +195,7 @@ public: /// Process user request and return response. /// check_acl = false only when converting data from ZooKeeper. ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl = true); + std::optional preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid); void finalize(); diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.h b/src/Coordination/WriteBufferFromNuraftBuffer.h index d52049edcff..c9ca1e2a227 100644 --- a/src/Coordination/WriteBufferFromNuraftBuffer.h +++ b/src/Coordination/WriteBufferFromNuraftBuffer.h @@ -12,7 +12,6 @@ public: WriteBufferFromNuraftBuffer(); nuraft::ptr getBuffer(); - bool isFinished() const { return finalized; } ~WriteBufferFromNuraftBuffer() override; From cff68aa31fce7a646a30308035ebfb4033b2fead Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 6 May 2022 12:25:25 +0000 Subject: [PATCH 064/615] Precommit implementation --- src/Coordination/KeeperServer.cpp | 17 + src/Coordination/KeeperStateMachine.cpp | 6 +- src/Coordination/KeeperStorage.cpp | 1233 +++++++++++++---------- src/Coordination/KeeperStorage.h | 81 +- 4 files changed, 790 insertions(+), 547 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 0ff390cead9..0e54ba02139 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -315,6 +316,22 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items); + auto log_store = state_manager->load_log_store(); + auto next_log_idx = log_store->next_slot(); + if (next_log_idx > 0 && next_log_idx > state_machine->last_commit_index()) + { + auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, next_log_idx); + + auto idx = state_machine->last_commit_index() + 1; + for (const auto & entry : *log_entries) + { + if (entry && entry->get_val_type() == nuraft::log_val_type::app_log) + state_machine->preprocess(idx, entry->get_buf()); + + ++idx; + } + } + loadLatestConfig(); last_local_config = state_manager->parseServersConfiguration(config, true).cluster_config; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index ecb9e2cb667..00890aa9fd1 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -119,18 +119,16 @@ void KeeperStateMachine::init() nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) { - auto request_for_session = parseRequest(data); - LOG_WARNING(log, "Precommitting {}", log_idx); + preprocess(log_idx, data); return nullptr; } std::optional KeeperStateMachine::preprocess(const uint64_t log_idx, nuraft::buffer & data) { + LOG_INFO(&Poco::Logger::get("Storageeee"), "Preprocess called"); auto request_for_session = parseRequest(data); if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) - { return std::nullopt; - } std::lock_guard lock(storage_and_responses_lock); return storage->preprocessRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, log_idx); } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index d1c9aa3c16e..8add31629e6 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1,20 +1,21 @@ -#include -#include "Common/ZooKeeper/ZooKeeperConstants.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "Common/ZooKeeper/ZooKeeperConstants.h" +#include +#include +#include #include +#include namespace DB { @@ -28,156 +29,158 @@ namespace ErrorCodes namespace { -String base64Encode(const String & decoded) -{ - std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - ostr.exceptions(std::ios::failbit); - Poco::Base64Encoder encoder(ostr); - encoder.rdbuf()->setLineLength(0); - encoder << decoded; - encoder.close(); - return ostr.str(); -} - -String getSHA1(const String & userdata) -{ - Poco::SHA1Engine engine; - engine.update(userdata); - const auto & digest_id = engine.digest(); - return String{digest_id.begin(), digest_id.end()}; -} - -String generateDigest(const String & userdata) -{ - std::vector user_password; - boost::split(user_password, userdata, [](char c) { return c == ':'; }); - return user_password[0] + ":" + base64Encode(getSHA1(userdata)); -} - -bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector & session_auths) -{ - if (node_acls.empty()) - return true; - - for (const auto & session_auth : session_auths) - if (session_auth.scheme == "super") - return true; - - for (const auto & node_acl : node_acls) + String base64Encode(const String & decoded) { - if (node_acl.permissions & permission) - { - if (node_acl.scheme == "world" && node_acl.id == "anyone") - return true; - - for (const auto & session_auth : session_auths) - { - if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) - return true; - } - } + std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + ostr.exceptions(std::ios::failbit); + Poco::Base64Encoder encoder(ostr); + encoder.rdbuf()->setLineLength(0); + encoder << decoded; + encoder.close(); + return ostr.str(); } - return false; -} - -bool fixupACL( - const std::vector & request_acls, - const std::vector & current_ids, - std::vector & result_acls) -{ - if (request_acls.empty()) - return true; - - bool valid_found = false; - for (const auto & request_acl : request_acls) + String getSHA1(const String & userdata) { - if (request_acl.scheme == "auth") + Poco::SHA1Engine engine; + engine.update(userdata); + const auto & digest_id = engine.digest(); + return String{digest_id.begin(), digest_id.end()}; + } + + String generateDigest(const String & userdata) + { + std::vector user_password; + boost::split(user_password, userdata, [](char c) { return c == ':'; }); + return user_password[0] + ":" + base64Encode(getSHA1(userdata)); + } + + bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector & session_auths) + { + if (node_acls.empty()) + return true; + + for (const auto & session_auth : session_auths) + if (session_auth.scheme == "super") + return true; + + for (const auto & node_acl : node_acls) { - for (const auto & current_id : current_ids) + if (node_acl.permissions & permission) { + if (node_acl.scheme == "world" && node_acl.id == "anyone") + return true; + + for (const auto & session_auth : session_auths) + { + if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) + return true; + } + } + } + + return false; + } + + bool fixupACL( + const std::vector & request_acls, + const std::vector & current_ids, + std::vector & result_acls) + { + if (request_acls.empty()) + return true; + + bool valid_found = false; + for (const auto & request_acl : request_acls) + { + if (request_acl.scheme == "auth") + { + for (const auto & current_id : current_ids) + { + valid_found = true; + Coordination::ACL new_acl = request_acl; + new_acl.scheme = current_id.scheme; + new_acl.id = current_id.id; + result_acls.push_back(new_acl); + } + } + else if (request_acl.scheme == "world" && request_acl.id == "anyone") + { + /// We don't need to save default ACLs valid_found = true; + } + else if (request_acl.scheme == "digest") + { Coordination::ACL new_acl = request_acl; - new_acl.scheme = current_id.scheme; - new_acl.id = current_id.id; + + /// Bad auth + if (std::count(new_acl.id.begin(), new_acl.id.end(), ':') != 1) + return false; + + valid_found = true; result_acls.push_back(new_acl); } } - else if (request_acl.scheme == "world" && request_acl.id == "anyone") + return valid_found; + } + + KeeperStorage::ResponsesForSessions processWatchesImpl( + const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) + { + KeeperStorage::ResponsesForSessions result; + auto it = watches.find(path); + if (it != watches.end()) { - /// We don't need to save default ACLs - valid_found = true; - } - else if (request_acl.scheme == "digest") - { - Coordination::ACL new_acl = request_acl; - - /// Bad auth - if (std::count(new_acl.id.begin(), new_acl.id.end(), ':') != 1) - return false; - - valid_found = true; - result_acls.push_back(new_acl); - } - } - return valid_found; -} - -KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) -{ - KeeperStorage::ResponsesForSessions result; - auto it = watches.find(path); - if (it != watches.end()) - { - std::shared_ptr watch_response = std::make_shared(); - watch_response->path = path; - watch_response->xid = Coordination::WATCH_XID; - watch_response->zxid = -1; - watch_response->type = event_type; - watch_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); - - watches.erase(it); - } - - auto parent_path = parentPath(path); - - Strings paths_to_check_for_list_watches; - if (event_type == Coordination::Event::CREATED) - { - paths_to_check_for_list_watches.push_back(parent_path.toString()); /// Trigger list watches for parent - } - else if (event_type == Coordination::Event::DELETED) - { - paths_to_check_for_list_watches.push_back(path); /// Trigger both list watches for this path - paths_to_check_for_list_watches.push_back(parent_path.toString()); /// And for parent path - } - /// CHANGED event never trigger list wathes - - for (const auto & path_to_check : paths_to_check_for_list_watches) - { - it = list_watches.find(path_to_check); - if (it != list_watches.end()) - { - std::shared_ptr watch_list_response = std::make_shared(); - watch_list_response->path = path_to_check; - watch_list_response->xid = Coordination::WATCH_XID; - watch_list_response->zxid = -1; - if (path_to_check == parent_path) - watch_list_response->type = Coordination::Event::CHILD; - else - watch_list_response->type = Coordination::Event::DELETED; - - watch_list_response->state = Coordination::State::CONNECTED; + std::shared_ptr watch_response = std::make_shared(); + watch_response->path = path; + watch_response->xid = Coordination::WATCH_XID; + watch_response->zxid = -1; + watch_response->type = event_type; + watch_response->state = Coordination::State::CONNECTED; for (auto watcher_session : it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); + result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); - list_watches.erase(it); + watches.erase(it); } + + auto parent_path = parentPath(path); + + Strings paths_to_check_for_list_watches; + if (event_type == Coordination::Event::CREATED) + { + paths_to_check_for_list_watches.push_back(parent_path.toString()); /// Trigger list watches for parent + } + else if (event_type == Coordination::Event::DELETED) + { + paths_to_check_for_list_watches.push_back(path); /// Trigger both list watches for this path + paths_to_check_for_list_watches.push_back(parent_path.toString()); /// And for parent path + } + /// CHANGED event never trigger list wathes + + for (const auto & path_to_check : paths_to_check_for_list_watches) + { + it = list_watches.find(path_to_check); + if (it != list_watches.end()) + { + std::shared_ptr watch_list_response + = std::make_shared(); + watch_list_response->path = path_to_check; + watch_list_response->xid = Coordination::WATCH_XID; + watch_list_response->zxid = -1; + if (path_to_check == parent_path) + watch_list_response->type = Coordination::Event::CHILD; + else + watch_list_response->type = Coordination::Event::DELETED; + + watch_list_response->state = Coordination::State::CONNECTED; + for (auto watcher_session : it->second) + result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); + + list_watches.erase(it); + } + } + return result; } - return result; -} } void KeeperStorage::Node::setData(String new_data) @@ -199,58 +202,243 @@ void KeeperStorage::Node::removeChild(StringRef child_path) } KeeperStorage::KeeperStorage(int64_t tick_time_ms, const String & superdigest_) - : session_expiry_queue(tick_time_ms) - , superdigest(superdigest_) + : session_expiry_queue(tick_time_ms), superdigest(superdigest_) { container.insert("/", Node()); } -KeeperStorage::CurrentNodePtr KeeperStorage::CurrentNodes::getNode(const std::string &path) +template +struct Overloaded : Ts... { - if (auto maybe_node_it = updated_nodes.find(path); maybe_node_it != updated_nodes.end()) - { - return maybe_node_it->second; - } + using Ts::operator()...; +}; +template +Overloaded(Ts...) -> Overloaded; + +std::shared_ptr KeeperStorage::CurrentNodes::getNode(StringRef path) +{ + std::shared_ptr node{nullptr}; if (auto maybe_node_it = storage.container.find(path); maybe_node_it != storage.container.end()) { - return std::make_shared(-1, maybe_node_it->value.stat, maybe_node_it->value.getData(), maybe_node_it->value.seq_num); + const auto & committed_node = maybe_node_it->value; + node = std::make_shared(); + node->stat = committed_node.stat; } - return nullptr; + applyDeltas( + path, + Overloaded{ + [&](const CreateNodeDelta & create_delta) + { + assert(!node); + node = std::make_shared(); + node->stat = create_delta.stat; + node->setData(create_delta.data); + }, + [&](const RemoveNodeDelta & /*remove_delta*/) + { + assert(node); + node = nullptr; + }, + [&](auto && /*delta*/) {}, + }); + + return node; } -bool KeeperStorage::CurrentNodes::hasNode(const std::string & path) const +bool KeeperStorage::CurrentNodes::hasNode(StringRef path) const { - return updated_nodes.contains(path) || storage.container.contains(path); + bool exists = storage.container.contains(std::string{path}); + applyDeltas( + path, + Overloaded{ + [&](const CreateNodeDelta & /*create_delta*/) + { + assert(!exists); + exists = true; + }, + [&](const RemoveNodeDelta & /*remove_delta*/) + { + assert(exists); + exists = false; + }, + [&](auto && /*delta*/) {}, + }); + + return exists; } -void KeeperStorage::CurrentNodes::insertNode(const std::string & path, const CurrentNodePtr & new_node) +namespace { - if (auto maybe_node_it = updated_nodes.find(path); maybe_node_it != updated_nodes.end()) + [[noreturn]] void fail() { - if (maybe_node_it->second == new_node) - return; + LOG_INFO(&Poco::Logger::get("KeeperStorage"), "Inconsistency found, terminating"); + std::terminate(); + } +} - maybe_node_it->second = new_node; - return; +Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_id) +{ + for (auto & delta : current_nodes.deltas) + { + if (delta.zxid > commit_zxid) + break; + + auto result = std::visit( + Overloaded{ + [&, &path = delta.path](KeeperStorage::CreateNodeDelta & create_delta) + { + if (!createNode( + path, + std::move(create_delta.data), + create_delta.stat, + create_delta.is_sequental, + create_delta.is_ephemeral, + std::move(create_delta.acls), + session_id)) + fail(); + + return Coordination::Error::ZOK; + }, + [&, &path = delta.path](KeeperStorage::UpdateNodeDelta & update_delta) + { + auto it = container.find(path); + if (it == container.end()) + fail(); + + if (update_delta.version != -1 && update_delta.version != it->value.stat.version) + fail(); + + container.updateValue(path, update_delta.update_fn); + return Coordination::Error::ZOK; + }, + [&, &path = delta.path](KeeperStorage::RemoveNodeDelta & remove_delta) + { + if (!removeNode(path, remove_delta.version)) + fail(); + + return Coordination::Error::ZOK; + }, + [&, &path = delta.path](KeeperStorage::SetACLDelta & acl_delta) + { + auto it = container.find(path); + if (it != container.end()) + fail(); + + if (acl_delta.version != -1 && acl_delta.version != it->value.stat.aversion) + fail(); + + acl_map.removeUsage(it->value.acl_id); + + uint64_t acl_id = acl_map.convertACLs(acl_delta.acls); + acl_map.addUsage(acl_id); + + container.updateValue(path, [acl_id](KeeperStorage::Node & node) { node.acl_id = acl_id; }); + + return Coordination::Error::ZOK; + }, + [&](KeeperStorage::ErrorDelta & error_delta) { return error_delta.error; }}, + delta.operation); + + if (result != Coordination::Error::ZOK) + return result; } - updated_nodes.emplace(path, new_node); + return Coordination::Error::ZOK; } +bool KeeperStorage::createNode( + const std::string & path, + String data, + const Coordination::Stat & stat, + bool is_sequental, + bool is_ephemeral, + Coordination::ACLs node_acls, + int64_t session_id) +{ + auto parent_path = parentPath(path); + auto it = container.find(parent_path); + + if (it == container.end()) + return false; + + if (it->value.stat.ephemeralOwner != 0) + return false; + + if (container.contains(path)) + return false; + + KeeperStorage::Node created_node; + + uint64_t acl_id = acl_map.convertACLs(node_acls); + acl_map.addUsage(acl_id); + + created_node.acl_id = acl_id; + created_node.stat = stat; + created_node.setData(std::move(data)); + created_node.is_sequental = is_sequental; + container.insert(path, created_node); + auto [map_key, _] = container.insert(path, created_node); + /// Take child path from key owned by map. + auto child_path = getBaseName(map_key->getKey()); + container.updateValue(parent_path, [child_path](KeeperStorage::Node & parent) { parent.addChild(child_path); }); + + if (is_ephemeral) + ephemerals[session_id].emplace(path); + + return true; +}; + +bool KeeperStorage::removeNode(const std::string & path, int32_t version) +{ + auto it = container.find(path); + if (it == container.end()) + return false; + + if (version != -1 && version != it->value.stat.version) + return false; + + if (it->value.stat.numChildren) + return false; + + auto prev_node = it->value; + if (prev_node.stat.ephemeralOwner != 0) + { + auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner); + ephemerals_it->second.erase(path); + if (ephemerals_it->second.empty()) + ephemerals.erase(ephemerals_it); + } + + acl_map.removeUsage(prev_node.acl_id); + + container.updateValue( + parentPath(path), [child_basename = getBaseName(it->key)](KeeperStorage::Node & parent) { parent.removeChild(child_basename); }); + + container.erase(path); + return true; +} + + using Undo = std::function; struct KeeperStorageRequestProcessor { Coordination::ZooKeeperRequestPtr zk_request; - explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) - : zk_request(zk_request_) - {} - virtual std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const = 0; - virtual void preprocess(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/) const {} - virtual KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const { return {}; } + explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) : zk_request(zk_request_) { } + virtual Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const = 0; + virtual std::vector + preprocess(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/) const + { + return {}; + } + virtual KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const + { + return {}; + } virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/) const { return true; } virtual ~KeeperStorageRequestProcessor() = default; @@ -259,7 +447,8 @@ struct KeeperStorageRequestProcessor struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override { return {zk_request->makeResponse(), {}}; } @@ -268,7 +457,8 @@ struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageReques struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override { auto response = zk_request->makeResponse(); dynamic_cast(*response).path @@ -277,11 +467,16 @@ struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProc } }; +namespace +{ +} + struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); } @@ -304,29 +499,26 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr return checkACL(Coordination::ACL::Create, node_acls, session_auths); } - void preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); + std::vector new_deltas; + auto parent_path = parentPath(request.path); auto parent_node = storage.current_nodes.getNode(std::string{parent_path}); if (parent_node == nullptr) - { - LOG_INFO(&Poco::Logger::get("Storage"), "Node not found"); - return; - } + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + else if (parent_node->stat.ephemeralOwner != 0) - { - LOG_INFO(&Poco::Logger::get("Storage"), "Ephemerals cannot have children"); - return; - } + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}}; std::string path_created = request.path; if (request.is_sequential) { auto seq_num = parent_node->seq_num; - std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM seq_num_str.exceptions(std::ios::failbit); seq_num_str << std::setw(10) << std::setfill('0') << seq_num; @@ -334,175 +526,81 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr } if (storage.current_nodes.hasNode(path_created)) - { - LOG_INFO(&Poco::Logger::get("Storage"), "Node exists"); - return; - } + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNODEEXISTS}}}; if (getBaseName(path_created).size == 0) - { - LOG_INFO(&Poco::Logger::get("Storage"), "Invalid path"); - return; - } + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADARGUMENTS}}}; - KeeperStorage::CurrentNodePtr created_node = std::make_shared(zxid, Coordination::Stat{}, request.data, 0); - created_node->stat.czxid = zxid; - created_node->stat.mzxid = zxid; - created_node->stat.pzxid = zxid; - created_node->stat.ctime = time; - created_node->stat.mtime = time; - created_node->stat.numChildren = 0; - created_node->stat.dataLength = request.data.length(); - created_node->stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; + Coordination::ACLs node_acls; + if (!fixupACL(request.acls, storage.session_and_auth[session_id], node_acls)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZINVALIDACL}}}; - storage.current_nodes.insertNode(path_created, created_node); + Coordination::Stat stat; + stat.czxid = zxid; + stat.mzxid = zxid; + stat.pzxid = zxid; + stat.ctime = time; + stat.mtime = time; + stat.numChildren = 0; + stat.dataLength = request.data.length(); + stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; + + new_deltas.emplace_back( + path_created, + zxid, + KeeperStorage::CreateNodeDelta{stat, request.is_ephemeral, request.is_sequential, std::move(node_acls), request.data}); int32_t parent_cversion = request.parent_cversion; - /// Increment sequential number even if node is not sequential - ++parent_node->seq_num; + new_deltas.emplace_back( + std::string{parent_path}, + zxid, + KeeperStorage::UpdateNodeDelta{[parent_cversion, zxid](KeeperStorage::Node & node) + { + ++node.seq_num; + if (parent_cversion == -1) + ++node.stat.cversion; + else if (parent_cversion > node.stat.cversion) + node.stat.cversion = parent_cversion; - if (parent_cversion == -1) - ++parent_node->stat.cversion; - else if (parent_cversion > parent_node->stat.cversion) - parent_node->stat.cversion = parent_cversion; - - if (zxid > parent_node->stat.pzxid) - parent_node->stat.pzxid = zxid; - ++parent_node->stat.numChildren; - storage.current_nodes.insertNode(std::string{parent_path}, parent_node); + if (zxid > node.stat.pzxid) + node.stat.pzxid = zxid; + ++node.stat.numChildren; + }}); + return new_deltas; } - std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override { - auto & container = storage.container; - auto & ephemerals = storage.ephemerals; - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Undo undo; Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); - auto parent_path = parentPath(request.path); - auto it = container.find(parent_path); - - if (it == container.end()) + auto result = storage.commit(zxid, session_id); + if (result != Coordination::Error::ZOK) { - response.error = Coordination::Error::ZNONODE; - return { response_ptr, undo }; - } - else if (it->value.stat.ephemeralOwner != 0) - { - response.error = Coordination::Error::ZNOCHILDRENFOREPHEMERALS; - return { response_ptr, undo }; - } - std::string path_created = request.path; - if (request.is_sequential) - { - auto seq_num = it->value.seq_num; - - std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - seq_num_str.exceptions(std::ios::failbit); - seq_num_str << std::setw(10) << std::setfill('0') << seq_num; - - path_created += seq_num_str.str(); - } - if (container.contains(path_created)) - { - response.error = Coordination::Error::ZNODEEXISTS; - return { response_ptr, undo }; - } - if (getBaseName(path_created).size == 0) - { - response.error = Coordination::Error::ZBADARGUMENTS; - return { response_ptr, undo }; - } - - auto & session_auth_ids = storage.session_and_auth[session_id]; - - KeeperStorage::Node created_node; - - Coordination::ACLs node_acls; - if (!fixupACL(request.acls, session_auth_ids, node_acls)) - { - response.error = Coordination::Error::ZINVALIDACL; + response.error = result; return {response_ptr, {}}; } - uint64_t acl_id = storage.acl_map.convertACLs(node_acls); - storage.acl_map.addUsage(acl_id); + const auto & deltas = storage.current_nodes.deltas; + auto create_delta_it = std::find_if( + deltas.begin(), + deltas.end(), + [zxid](const auto & delta) + { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); - created_node.acl_id = acl_id; - created_node.stat.czxid = zxid; - created_node.stat.mzxid = zxid; - created_node.stat.pzxid = zxid; - created_node.stat.ctime = time; - created_node.stat.mtime = time; - created_node.stat.numChildren = 0; - created_node.stat.dataLength = request.data.length(); - created_node.stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; - created_node.is_sequental = request.is_sequential; - created_node.setData(std::move(request.data)); - - auto [map_key, _] = container.insert(path_created, created_node); - /// Take child path from key owned by map. - auto child_path = getBaseName(map_key->getKey()); - - int32_t parent_cversion = request.parent_cversion; - int64_t prev_parent_zxid; - int32_t prev_parent_cversion; - container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid, - parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent) - { - parent.addChild(child_path); - prev_parent_cversion = parent.stat.cversion; - prev_parent_zxid = parent.stat.pzxid; - - /// Increment sequential number even if node is not sequential - ++parent.seq_num; - - if (parent_cversion == -1) - ++parent.stat.cversion; - else if (parent_cversion > parent.stat.cversion) - parent.stat.cversion = parent_cversion; - - if (zxid > parent.stat.pzxid) - parent.stat.pzxid = zxid; - ++parent.stat.numChildren; - }); - - response.path_created = path_created; - - if (request.is_ephemeral) - ephemerals[session_id].emplace(path_created); - - undo = [&storage, prev_parent_zxid, prev_parent_cversion, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id] - { - storage.acl_map.removeUsage(acl_id); - - if (is_ephemeral) - storage.ephemerals[session_id].erase(path_created); - - storage.container.updateValue(parent_path, [child_path, prev_parent_zxid, prev_parent_cversion] (KeeperStorage::Node & undo_parent) - { - --undo_parent.stat.numChildren; - --undo_parent.seq_num; - undo_parent.stat.cversion = prev_parent_cversion; - undo_parent.stat.pzxid = prev_parent_zxid; - undo_parent.removeChild(child_path); - }); - - storage.container.erase(path_created); - }; + if (create_delta_it == deltas.end()) + std::terminate(); + response.path_created = create_delta_it->path; response.error = Coordination::Error::ZOK; - return { response_ptr, undo }; + return response_ptr; } }; struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; @@ -519,7 +617,20 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override + + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); + + if (!storage.current_nodes.hasNode(request.path)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + + return {}; + } + + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override { auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); @@ -538,28 +649,10 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce response.error = Coordination::Error::ZOK; } - return { response_ptr, {} }; + return response_ptr; } }; -namespace -{ - /// Garbage required to apply log to "fuzzy" zookeeper snapshot - void updateParentPzxid(const std::string & child_path, int64_t zxid, KeeperStorage::Container & container) - { - auto parent_path = parentPath(child_path); - auto parent_it = container.find(parent_path); - if (parent_it != container.end()) - { - container.updateValue(parent_path, [zxid](KeeperStorage::Node & parent) - { - if (parent.stat.pzxid < zxid) - parent.stat.pzxid = zxid; - }); - } - } -} - struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override @@ -578,81 +671,77 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /* time */) const override + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override { - auto & container = storage.container; - auto & ephemerals = storage.ephemerals; - - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*zk_request); - Undo undo; - auto it = container.find(request.path); - if (it == container.end()) + std::vector new_deltas; + + const auto update_parent_pzxid = [&]() + { + auto parent_path = parentPath(request.path); + if (!storage.current_nodes.hasNode(parent_path)) + return; + + new_deltas.emplace_back( + std::string{parent_path}, + zxid, + KeeperStorage::UpdateNodeDelta{[zxid](KeeperStorage::Node & parent) + { + if (parent.stat.pzxid < zxid) + parent.stat.pzxid = zxid; + }}); + }; + + auto node = storage.current_nodes.getNode(request.path); + + if (!node) { if (request.restored_from_zookeeper_log) - updateParentPzxid(request.path, zxid, container); - response.error = Coordination::Error::ZNONODE; + update_parent_pzxid(); + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; } - else if (request.version != -1 && request.version != it->value.stat.version) - { - response.error = Coordination::Error::ZBADVERSION; - } - else if (it->value.stat.numChildren) - { - response.error = Coordination::Error::ZNOTEMPTY; - } - else - { - if (request.restored_from_zookeeper_log) - updateParentPzxid(request.path, zxid, container); + else if (request.version != -1 && request.version != node->stat.version) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADVERSION}}}; + else if (node->stat.numChildren) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNOTEMPTY}}}; - auto prev_node = it->value; - if (prev_node.stat.ephemeralOwner != 0) - { - auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner); - ephemerals_it->second.erase(request.path); - if (ephemerals_it->second.empty()) - ephemerals.erase(ephemerals_it); - } + if (request.restored_from_zookeeper_log) + update_parent_pzxid(); - storage.acl_map.removeUsage(prev_node.acl_id); + new_deltas.emplace_back( + std::string{parentPath(request.path)}, + zxid, + KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent) + { + --parent.stat.numChildren; + ++parent.stat.cversion; + }}); - container.updateValue(parentPath(request.path), [child_basename = getBaseName(it->key)] (KeeperStorage::Node & parent) - { - --parent.stat.numChildren; - ++parent.stat.cversion; - parent.removeChild(child_basename); - }); + new_deltas.emplace_back(request.path, zxid, KeeperStorage::RemoveNodeDelta{request.version}); - response.error = Coordination::Error::ZOK; - /// Erase full path from container after child removed from parent - container.erase(request.path); - - undo = [prev_node, &storage, path = request.path] - { - if (prev_node.stat.ephemeralOwner != 0) - storage.ephemerals[prev_node.stat.ephemeralOwner].emplace(path); - - storage.acl_map.addUsage(prev_node.acl_id); - - /// Dangerous place: we are adding StringRef to child into children unordered_hash set. - /// That's why we are taking getBaseName from inserted key, not from the path from request object. - auto [map_key, _] = storage.container.insert(path, prev_node); - storage.container.updateValue(parentPath(path), [child_name = getBaseName(map_key->getKey())] (KeeperStorage::Node & parent) - { - ++parent.stat.numChildren; - --parent.stat.cversion; - parent.addChild(child_name); - }); - }; - } - - return { response_ptr, undo }; + return new_deltas; } - KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Undo undo; + Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); + + auto result = storage.commit(zxid, session_id); + if (result != Coordination::Error::ZOK) + { + response.error = result; + return {response_ptr, {}}; + } + + return response_ptr; + } + + KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); } @@ -661,7 +750,20 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /* session_id */, int64_t /* time */) const override + + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); + + if (!storage.current_nodes.hasNode(request.path)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + + return {}; + } + + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /* session_id */, int64_t /* time */) const override { auto & container = storage.container; @@ -680,7 +782,7 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr response.error = Coordination::Error::ZNONODE; } - return { response_ptr, {} }; + return response_ptr; } }; @@ -702,7 +804,42 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /* session_id */, int64_t time) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t time) const override + { + Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); + + std::vector new_deltas; + + if (!storage.current_nodes.hasNode(request.path)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + + auto node = storage.current_nodes.getNode(request.path); + + if (request.version != -1 && request.version != node->stat.version) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADVERSION}}}; + + new_deltas.emplace_back( + request.path, + zxid, + KeeperStorage::UpdateNodeDelta{ + [zxid, data = request.data, time](KeeperStorage::Node & value) mutable + { + value.stat.version++; + value.stat.mzxid = zxid; + value.stat.mtime = time; + value.stat.dataLength = data.length(); + value.setData(std::move(data)); + }, + request.version}); + + new_deltas.emplace_back(parentPath(request.path).toString(), zxid, KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent) { + parent.stat.cversion++; + }}); + + return new_deltas; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override { auto & container = storage.container; @@ -711,51 +848,25 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); Undo undo; + auto result = storage.commit(zxid, session_id); + if (result != Coordination::Error::ZOK) + { + response.error = result; + return {response_ptr, {}}; + } + auto it = container.find(request.path); if (it == container.end()) - { - response.error = Coordination::Error::ZNONODE; - } - else if (request.version == -1 || request.version == it->value.stat.version) - { + fail(); - auto prev_node = it->value; + response.stat = it->value.stat; + response.error = Coordination::Error::ZOK; - auto itr = container.updateValue(request.path, [zxid, request, time] (KeeperStorage::Node & value) mutable - { - value.stat.version++; - value.stat.mzxid = zxid; - value.stat.mtime = time; - value.stat.dataLength = request.data.length(); - value.setData(std::move(request.data)); - }); - - container.updateValue(parentPath(request.path), [] (KeeperStorage::Node & parent) - { - parent.stat.cversion++; - }); - - response.stat = itr->value.stat; - response.error = Coordination::Error::ZOK; - - undo = [prev_node, &container, path = request.path] - { - container.updateValue(path, [&prev_node] (KeeperStorage::Node & value) { value = prev_node; }); - container.updateValue(parentPath(path), [] (KeeperStorage::Node & parent) - { - parent.stat.cversion--; - }); - }; - } - else - { - response.error = Coordination::Error::ZBADVERSION; - } - - return { response_ptr, undo }; + return response_ptr; } - KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); } @@ -779,7 +890,19 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); + + if (!storage.current_nodes.hasNode(request.path)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + + return {}; + } + + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override { auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); @@ -807,7 +930,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc response.error = Coordination::Error::ZOK; } - return { response_ptr, {} }; + return response_ptr; } }; @@ -829,7 +952,23 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); + + if (!storage.current_nodes.hasNode(request.path)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + + auto node = storage.current_nodes.getNode(request.path); + if (request.version != -1 && request.version != node->stat.version) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADVERSION}}}; + + return {}; + } + + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override { auto & container = storage.container; @@ -850,7 +989,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro response.error = Coordination::Error::ZOK; } - return { response_ptr, {} }; + return response_ptr; } }; @@ -874,48 +1013,51 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id, int64_t /* time */) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override { - auto & container = storage.container; + Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); + auto & current_nodes = storage.current_nodes; + if (!current_nodes.hasNode(request.path)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + + auto node = current_nodes.getNode(request.path); + + if (request.version != -1 && request.version != node->stat.aversion) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADVERSION}}}; + + + auto & session_auth_ids = storage.session_and_auth[session_id]; + Coordination::ACLs node_acls; + + if (!fixupACL(request.acls, session_auth_ids, node_acls)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZINVALIDACL}}}; + + return { + {request.path, zxid, KeeperStorage::SetACLDelta{std::move(node_acls), request.version}}, + {request.path, zxid, KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & n) { ++n.stat.aversion; }}}}; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override + { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperSetACLResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it == container.end()) + + auto result = storage.commit(zxid, session_id); + if (result != Coordination::Error::ZOK) { - response.error = Coordination::Error::ZNONODE; - } - else if (request.version != -1 && request.version != it->value.stat.aversion) - { - response.error = Coordination::Error::ZBADVERSION; - } - else - { - auto & session_auth_ids = storage.session_and_auth[session_id]; - Coordination::ACLs node_acls; - - if (!fixupACL(request.acls, session_auth_ids, node_acls)) - { - response.error = Coordination::Error::ZINVALIDACL; - return {response_ptr, {}}; - } - - uint64_t acl_id = storage.acl_map.convertACLs(node_acls); - storage.acl_map.addUsage(acl_id); - - storage.container.updateValue(request.path, [acl_id] (KeeperStorage::Node & node) - { - node.acl_id = acl_id; - ++node.stat.aversion; - }); - - response.stat = it->value.stat; - response.error = Coordination::Error::ZOK; + response.error = result; + return {response_ptr, {}}; } - /// It cannot be used insied multitransaction? - return { response_ptr, {} }; + auto it = storage.container.find(request.path); + if (it == storage.container.end()) + fail(); + response.stat = it->value.stat; + response.error = Coordination::Error::ZOK; + + return response_ptr; } }; @@ -938,7 +1080,19 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); + + if (!storage.current_nodes.hasNode(request.path)) + return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + + return {}; + } + + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperGetACLResponse & response = dynamic_cast(*response_ptr); @@ -955,7 +1109,7 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr response.acl = storage.acl_map.convertNumber(it->value.acl_id); } - return {response_ptr, {}}; + return response_ptr; } }; @@ -994,23 +1148,30 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro concrete_requests.push_back(std::make_shared(sub_zk_request)); break; default: - throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); + throw DB::Exception( + ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); } } } - std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + for (const auto & concrete_request : concrete_requests) + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); - std::vector undo_actions; + Coordination::ZooKeeperMultiRequest & request = dynamic_cast(*zk_request); try { size_t i = 0; for (const auto & concrete_request : concrete_requests) { - auto [ cur_response, undo_action ] = concrete_request->process(storage, zxid, session_id, time); + auto [cur_response, undo_action] = concrete_request->process(storage, zxid, session_id, time); response.responses[i] = cur_response; if (cur_response->error != Coordination::Error::ZOK) @@ -1032,7 +1193,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro if (*it) (*it)(); - return { response_ptr, {} }; + return {response_ptr, {}}; } else undo_actions.emplace_back(std::move(undo_action)); @@ -1041,18 +1202,12 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro } response.error = Coordination::Error::ZOK; - return { response_ptr, {} }; - } - catch (...) - { - for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it) - if (*it) - (*it)(); - throw; + return response_ptr; } } - KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { KeeperStorage::ResponsesForSessions result; for (const auto & generic_request : concrete_requests) @@ -1067,7 +1222,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage &, int64_t, int64_t, int64_t /* time */) const override + Coordination::ZooKeeperResponsePtr process(KeeperStorage &, int64_t, int64_t, int64_t /* time */) const override { throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR); } @@ -1076,11 +1231,12 @@ struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestPro struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id, int64_t /* time */) const override + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id, int64_t /* time */) const override { Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*zk_request); Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); + Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); auto & sessions_and_auth = storage.session_and_auth; if (auth_request.scheme != "digest" || std::count(auth_request.data.begin(), auth_request.data.end(), ':') != 1) @@ -1102,10 +1258,9 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc if (std::find(session_ids.begin(), session_ids.end(), auth) == session_ids.end()) session_ids.emplace_back(auth); } - } - return { response_ptr, {} }; + return {response_ptr, {}}; } }; @@ -1131,7 +1286,6 @@ void KeeperStorage::finalize() class KeeperStorageRequestProcessorsFactory final : private boost::noncopyable { - public: using Creator = std::function; using OpNumToRequest = std::unordered_map; @@ -1162,10 +1316,11 @@ private: KeeperStorageRequestProcessorsFactory(); }; -template +template void registerKeeperRequestProcessor(KeeperStorageRequestProcessorsFactory & factory) { - factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); + factory.registerRequest( + num, [](const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); } @@ -1189,10 +1344,17 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() } -std::optional KeeperStorage::preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid) +std::optional +KeeperStorage::preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid) { + if (new_last_zxid <= current_nodes.current_zxid) + return std::nullopt; + + current_nodes.current_zxid = new_last_zxid; KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(request); - request_processor->preprocess(*this, session_id, time, new_last_zxid); + auto new_deltas = request_processor->preprocess(*this, new_last_zxid, session_id, time); + current_nodes.deltas.insert( + current_nodes.deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); switch (request->getOpNum()) { using enum Coordination::OpNum; @@ -1203,13 +1365,19 @@ std::optional KeeperStorage::preprocessRequest(const Coordination::ZooK } } -KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl) +KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( + const Coordination::ZooKeeperRequestPtr & zk_request, + int64_t session_id, + int64_t time, + std::optional new_last_zxid, + bool check_acl) { KeeperStorage::ResponsesForSessions results; if (new_last_zxid) { if (zxid >= *new_last_zxid) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got new ZXID {} smaller or equal than current {}. It's a bug", *new_last_zxid, zxid); + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Got new ZXID {} smaller or equal than current {}. It's a bug", *new_last_zxid, zxid); zxid = *new_last_zxid; } @@ -1223,13 +1391,15 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina { for (const auto & ephemeral_path : it->second) { - container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (KeeperStorage::Node & parent) - { - --parent.stat.numChildren; - ++parent.stat.cversion; - auto base_name = getBaseName(ephemeral_path); - parent.removeChild(base_name); - }); + container.updateValue( + parentPath(ephemeral_path), + [&ephemeral_path](KeeperStorage::Node & parent) + { + --parent.stat.numChildren; + ++parent.stat.cversion; + auto base_name = getBaseName(ephemeral_path); + parent.removeChild(base_name); + }); container.erase(ephemeral_path); @@ -1274,6 +1444,14 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina else { std::tie(response, std::ignore) = request_processor->process(*this, zxid, session_id, time); + + auto & deltas = current_nodes.deltas; + while (!deltas.empty()) + { + if (deltas.front().zxid > zxid) + break; + deltas.pop_front(); + } } /// Watches for this requests are added to the watches lists @@ -1281,7 +1459,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina { if (response->error == Coordination::Error::ZOK) { - auto & watches_type = zk_request->getOpNum() == Coordination::OpNum::List || zk_request->getOpNum() == Coordination::OpNum::SimpleList + auto & watches_type + = zk_request->getOpNum() == Coordination::OpNum::List || zk_request->getOpNum() == Coordination::OpNum::SimpleList ? list_watches : watches; diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 23fdd0fbb0f..e00c5a25e8b 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -112,37 +112,86 @@ public: /// container. Container container; - struct CurrentNode + struct CreateNodeDelta { - CurrentNode(int64_t zxid_, Coordination::Stat stat_, String data_, int32_t seq_num_) - : zxid(zxid_) - , stat(stat_) - , seq_num(seq_num_) - , data(std::move(data_)) - {} - - int64_t zxid; - Coordination::Stat stat{}; - int32_t seq_num{0}; + Coordination::Stat stat; + bool is_ephemeral; + bool is_sequental; + Coordination::ACLs acls; String data; }; - using CurrentNodePtr = std::shared_ptr; + struct RemoveNodeDelta + { + int32_t version{-1}; + }; + + struct UpdateNodeDelta + { + std::function update_fn; + int32_t version{-1}; + }; + + struct SetACLDelta + { + Coordination::ACLs acls; + int32_t version{-1}; + }; + + struct ErrorDelta + { + Coordination::Error error; + }; + + using Operation = std::variant; + + struct Delta + { + Delta(String path_, int64_t zxid_, Operation operation_) + : path(std::move(path_)) + , zxid(zxid_) + , operation(std::move(operation_)) + {} + + Delta(int64_t zxid_, ErrorDelta error) + : Delta("", zxid_, error) + {} + + String path; + int64_t zxid; + Operation operation; + }; struct CurrentNodes { explicit CurrentNodes(KeeperStorage & storage_) : storage(storage_) {} - CurrentNodePtr getNode(const std::string & path); - bool hasNode(const std::string & path) const; - void insertNode(const std::string & path, const CurrentNodePtr & new_node); + template + void applyDeltas(StringRef path, const Visitor & visitor) const + { + for (const auto & delta : deltas) + { + if (delta.path == path) + std::visit(visitor, delta.operation); + } + } - std::unordered_map> updated_nodes; + std::shared_ptr getNode(StringRef path); + bool hasNode(StringRef path) const; + + std::unordered_map> node_to_deltas; + std::deque deltas; KeeperStorage & storage; + int64_t current_zxid{0}; }; CurrentNodes current_nodes{*this}; + Coordination::Error commit(int64_t zxid, int64_t session_id); + + bool createNode(const std::string & path, String data, const Coordination::Stat & stat, bool is_sequental, bool is_ephemeral, Coordination::ACLs node_acls, int64_t session_id); + bool removeNode(const std::string & path, int32_t version); + /// Mapping session_id -> set of ephemeral nodes paths Ephemerals ephemerals; /// Mapping session_id -> set of watched nodes paths From 7f6fa9fe83b02c7b490fd73c2d5bc588a5b788ba Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 07:02:11 +0000 Subject: [PATCH 065/615] Fix splitting impl of basic operations --- src/Coordination/KeeperServer.cpp | 23 +-- src/Coordination/KeeperStateMachine.cpp | 11 +- src/Coordination/KeeperStateMachine.h | 2 +- src/Coordination/KeeperStorage.cpp | 192 +++++++++++++----------- src/Coordination/KeeperStorage.h | 25 ++- 5 files changed, 128 insertions(+), 125 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 0e54ba02139..22a3a4624c6 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -505,28 +505,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ } if (initialized_flag) - { - switch (type) - { - case nuraft::cb_func::PreAppendLog: - { - auto & entry = *static_cast(param->ctx); - auto log_store = state_manager->load_log_store(); - auto next_idx = log_store->next_slot(); - auto maybe_digest = state_machine->preprocess(next_idx, entry->get_buf()); - if (maybe_digest) - { - auto & buff = entry->get_buf(); - DB::WriteBuffer buf(reinterpret_cast(buff.data_begin() + buff.size() - sizeof(int64_t)), buff.size()); - DB::writeIntBinary(*maybe_digest, buf); - } - - return nuraft::cb_func::ReturnCode::Ok; - } - default: - return nuraft::cb_func::ReturnCode::Ok; - } - } + return nuraft::cb_func::ReturnCode::Ok; size_t last_commited = state_machine->last_commit_index(); size_t next_index = state_manager->getLogStore()->next_slot(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 00890aa9fd1..0aa6b6d7ca0 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -44,10 +44,6 @@ namespace else /// backward compatibility request_for_session.time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); - int64_t digest; - readIntBinary(digest, buffer); - LOG_INFO(&Poco::Logger::get("STORAGEEE"), "Read digest {}", digest); - return request_for_session; } } @@ -123,14 +119,13 @@ nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nur return nullptr; } -std::optional KeeperStateMachine::preprocess(const uint64_t log_idx, nuraft::buffer & data) +void KeeperStateMachine::preprocess(const uint64_t log_idx, nuraft::buffer & data) { - LOG_INFO(&Poco::Logger::get("Storageeee"), "Preprocess called"); auto request_for_session = parseRequest(data); if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) - return std::nullopt; + return; std::lock_guard lock(storage_and_responses_lock); - return storage->preprocessRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, log_idx); + storage->preprocessRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, log_idx); } nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index ae9cc0fe1a0..a9fdfd7fac2 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -27,7 +27,7 @@ public: /// Read state from the latest snapshot void init(); - std::optional preprocess(uint64_t log_idx, nuraft::buffer & data); + void preprocess(uint64_t log_idx, nuraft::buffer & data); /// Currently not supported nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 8add31629e6..3e30c1be26f 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -285,6 +285,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i if (delta.zxid > commit_zxid) break; + bool finish_subdelta = false; auto result = std::visit( Overloaded{ [&, &path = delta.path](KeeperStorage::CreateNodeDelta & create_delta) @@ -338,11 +339,24 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i return Coordination::Error::ZOK; }, - [&](KeeperStorage::ErrorDelta & error_delta) { return error_delta.error; }}, + [&](KeeperStorage::ErrorDelta & error_delta) { return error_delta.error; }, + [&](KeeperStorage::SubDeltaEnd &) + { + finish_subdelta = true; + return Coordination::Error::ZOK; + }, + [&](KeeperStorage::FailedMultiDelta &) -> Coordination::Error + { + // this shouldn't be called in any process functions + fail(); + }}, delta.operation); if (result != Coordination::Error::ZOK) return result; + + if (finish_subdelta) + return Coordination::Error::ZOK; } return Coordination::Error::ZOK; @@ -421,8 +435,6 @@ bool KeeperStorage::removeNode(const std::string & path, int32_t version) } -using Undo = std::function; - struct KeeperStorageRequestProcessor { Coordination::ZooKeeperRequestPtr zk_request; @@ -463,7 +475,7 @@ struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProc auto response = zk_request->makeResponse(); dynamic_cast(*response).path = dynamic_cast(*zk_request).path; - return {response, {}}; + return response; } }; @@ -508,10 +520,10 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr auto parent_path = parentPath(request.path); auto parent_node = storage.current_nodes.getNode(std::string{parent_path}); if (parent_node == nullptr) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; else if (parent_node->stat.ephemeralOwner != 0) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}}; + return {{zxid, Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}; std::string path_created = request.path; if (request.is_sequential) @@ -526,14 +538,14 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr } if (storage.current_nodes.hasNode(path_created)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNODEEXISTS}}}; + return {{zxid, Coordination::Error::ZNODEEXISTS}}; if (getBaseName(path_created).size == 0) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADARGUMENTS}}}; + return {{zxid, Coordination::Error::ZBADARGUMENTS}}; Coordination::ACLs node_acls; if (!fixupACL(request.acls, storage.session_and_auth[session_id], node_acls)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZINVALIDACL}}}; + return {{zxid, Coordination::Error::ZINVALIDACL}}; Coordination::Stat stat; stat.czxid = zxid; @@ -573,14 +585,13 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Undo undo; Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); auto result = storage.commit(zxid, session_id); if (result != Coordination::Error::ZOK) { response.error = result; - return {response_ptr, {}}; + return response_ptr; } const auto & deltas = storage.current_nodes.deltas; @@ -624,7 +635,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); if (!storage.current_nodes.hasNode(request.path)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; return {}; } @@ -700,12 +711,12 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr { if (request.restored_from_zookeeper_log) update_parent_pzxid(); - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; } else if (request.version != -1 && request.version != node->stat.version) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADVERSION}}}; + return {{zxid, Coordination::Error::ZBADVERSION}}; else if (node->stat.numChildren) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNOTEMPTY}}}; + return {{zxid, Coordination::Error::ZNOTEMPTY}}; if (request.restored_from_zookeeper_log) update_parent_pzxid(); @@ -727,14 +738,13 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Undo undo; Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); auto result = storage.commit(zxid, session_id); if (result != Coordination::Error::ZOK) { response.error = result; - return {response_ptr, {}}; + return response_ptr; } return response_ptr; @@ -757,7 +767,7 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); if (!storage.current_nodes.hasNode(request.path)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; return {}; } @@ -811,12 +821,12 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce std::vector new_deltas; if (!storage.current_nodes.hasNode(request.path)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; auto node = storage.current_nodes.getNode(request.path); if (request.version != -1 && request.version != node->stat.version) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADVERSION}}}; + return {{zxid, Coordination::Error::ZBADVERSION}}; new_deltas.emplace_back( request.path, @@ -846,13 +856,12 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperSetResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); - Undo undo; auto result = storage.commit(zxid, session_id); if (result != Coordination::Error::ZOK) { response.error = result; - return {response_ptr, {}}; + return response_ptr; } auto it = container.find(request.path); @@ -896,7 +905,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); if (!storage.current_nodes.hasNode(request.path)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; return {}; } @@ -958,11 +967,11 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); if (!storage.current_nodes.hasNode(request.path)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; auto node = storage.current_nodes.getNode(request.path); if (request.version != -1 && request.version != node->stat.version) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADVERSION}}}; + return {{zxid, Coordination::Error::ZBADVERSION}}; return {}; } @@ -1019,19 +1028,19 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr auto & current_nodes = storage.current_nodes; if (!current_nodes.hasNode(request.path)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; auto node = current_nodes.getNode(request.path); if (request.version != -1 && request.version != node->stat.aversion) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZBADVERSION}}}; + return {{zxid, Coordination::Error::ZBADVERSION}}; auto & session_auth_ids = storage.session_and_auth[session_id]; Coordination::ACLs node_acls; if (!fixupACL(request.acls, session_auth_ids, node_acls)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZINVALIDACL}}}; + return {{zxid, Coordination::Error::ZINVALIDACL}}; return { {request.path, zxid, KeeperStorage::SetACLDelta{std::move(node_acls), request.version}}, @@ -1048,7 +1057,7 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr if (result != Coordination::Error::ZOK) { response.error = result; - return {response_ptr, {}}; + return response_ptr; } auto it = storage.container.find(request.path); @@ -1086,7 +1095,7 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); if (!storage.current_nodes.hasNode(request.path)) - return {{zxid, KeeperStorage::ErrorDelta{Coordination::Error::ZNONODE}}}; + return {{zxid, Coordination::Error::ZNONODE}}; return {}; } @@ -1154,56 +1163,77 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro } } - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { - for (const auto & concrete_request : concrete_requests) + auto & saved_deltas = storage.current_nodes.deltas; + + std::vector response_errors; + response_errors.reserve(concrete_requests.size()); + for (size_t i = 0; i < concrete_requests.size(); ++i) + { + auto new_deltas = concrete_requests[i]->preprocess(storage, zxid, session_id, time); + if (auto * error = std::get_if(&new_deltas.back().operation)) + { + std::erase_if(saved_deltas, [zxid](const auto & delta) { return delta.zxid == zxid; }); + + response_errors.push_back(error->error); + + for (size_t j = i + 1; j < concrete_requests.size(); ++j) + { + response_errors.push_back(Coordination::Error::ZRUNTIMEINCONSISTENCY); + } + + return {{zxid, KeeperStorage::FailedMultiDelta{std::move(response_errors)}}}; + } + new_deltas.emplace_back(zxid, KeeperStorage::SubDeltaEnd{}); + response_errors.push_back(Coordination::Error::ZOK); + + saved_deltas.insert(saved_deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); + } + + return {}; } Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperMultiRequest & request = dynamic_cast(*zk_request); - try + auto & deltas = storage.current_nodes.deltas; + if (auto * failed_multi = std::get_if(&deltas.front().operation)) { - size_t i = 0; - for (const auto & concrete_request : concrete_requests) + for (size_t i = 0; i < concrete_requests.size(); ++i) { - auto [cur_response, undo_action] = concrete_request->process(storage, zxid, session_id, time); - - response.responses[i] = cur_response; - if (cur_response->error != Coordination::Error::ZOK) - { - for (size_t j = 0; j <= i; ++j) - { - auto response_error = response.responses[j]->error; - response.responses[j] = std::make_shared(); - response.responses[j]->error = response_error; - } - - for (size_t j = i + 1; j < response.responses.size(); ++j) - { - response.responses[j] = std::make_shared(); - response.responses[j]->error = Coordination::Error::ZRUNTIMEINCONSISTENCY; - } - - for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it) - if (*it) - (*it)(); - - return {response_ptr, {}}; - } - else - undo_actions.emplace_back(std::move(undo_action)); - - ++i; + response.responses[i] = std::make_shared(); + response.responses[i]->error = failed_multi->error_codes[i]; } - response.error = Coordination::Error::ZOK; return response_ptr; } + + size_t i = 0; + for (const auto & concrete_request : concrete_requests) + { + auto cur_response = concrete_request->process(storage, zxid, session_id, time); + + while (!deltas.empty()) + { + if (std::holds_alternative(deltas.front().operation)) + { + deltas.pop_front(); + break; + } + + deltas.pop_front(); + } + + response.responses[i] = cur_response; + + ++i; + } + + response.error = Coordination::Error::ZOK; + return response_ptr; } KeeperStorage::ResponsesForSessions @@ -1260,7 +1290,7 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc } } - return {response_ptr, {}}; + return response_ptr; } }; @@ -1344,25 +1374,14 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() } -std::optional -KeeperStorage::preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid) +void KeeperStorage::preprocessRequest( + const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid) { - if (new_last_zxid <= current_nodes.current_zxid) - return std::nullopt; - current_nodes.current_zxid = new_last_zxid; KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(request); auto new_deltas = request_processor->preprocess(*this, new_last_zxid, session_id, time); current_nodes.deltas.insert( current_nodes.deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); - switch (request->getOpNum()) - { - using enum Coordination::OpNum; - case Create: - return new_last_zxid; - default: - return std::nullopt; - } } KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( @@ -1424,7 +1443,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special { KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); - auto [response, _] = storage_request->process(*this, zxid, session_id, time); + auto response = storage_request->process(*this, zxid, session_id, time); response->xid = zk_request->xid; response->zxid = getZXID(); @@ -1443,15 +1462,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( } else { - std::tie(response, std::ignore) = request_processor->process(*this, zxid, session_id, time); - - auto & deltas = current_nodes.deltas; - while (!deltas.empty()) - { - if (deltas.front().zxid > zxid) - break; - deltas.pop_front(); - } + response = request_processor->process(*this, zxid, session_id, time); + std::erase_if(current_nodes.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); } /// Watches for this requests are added to the watches lists diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index e00c5a25e8b..007c0e60b1d 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -143,7 +143,16 @@ public: Coordination::Error error; }; - using Operation = std::variant; + struct FailedMultiDelta + { + std::vector error_codes; + }; + + struct SubDeltaEnd + { + }; + + using Operation = std::variant; struct Delta { @@ -153,8 +162,16 @@ public: , operation(std::move(operation_)) {} - Delta(int64_t zxid_, ErrorDelta error) - : Delta("", zxid_, error) + Delta(int64_t zxid_, Coordination::Error error) + : Delta("", zxid_, ErrorDelta{error}) + {} + + Delta(int64_t zxid_, SubDeltaEnd subdelta) + : Delta("", zxid_, subdelta) + {} + + Delta(int64_t zxid_, FailedMultiDelta failed_multi) + : Delta("", zxid_, failed_multi) {} String path; @@ -244,7 +261,7 @@ public: /// Process user request and return response. /// check_acl = false only when converting data from ZooKeeper. ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl = true); - std::optional preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid); + void preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid); void finalize(); From ad7226e1515fe7b6e0734325345fa2df40001ad3 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 08:32:25 +0000 Subject: [PATCH 066/615] Add processLocal for read requests --- src/Coordination/KeeperStateMachine.cpp | 2 +- src/Coordination/KeeperStorage.cpp | 354 ++++++++++++++++++------ src/Coordination/KeeperStorage.h | 140 ++++------ 3 files changed, 312 insertions(+), 184 deletions(-) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 0aa6b6d7ca0..9b2c8947d95 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -357,7 +357,7 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi { /// Pure local request, just process it with storage std::lock_guard lock(storage_and_responses_lock); - auto responses = storage->processRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, std::nullopt); + auto responses = storage->processRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, std::nullopt, true /*check_acl*/, true /*is_local*/); for (const auto & response : responses) if (!responses_queue.push(response)) throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push response with session id {} into responses queue", response.session_id); diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 3e30c1be26f..2f7f4357f8c 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -446,6 +446,14 @@ struct KeeperStorageRequestProcessor { return {}; } + + // process the request using locally committed data + virtual Coordination::ZooKeeperResponsePtr + processLocal(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/) const + { + throw Exception{DB::ErrorCodes::LOGICAL_ERROR, "Cannot process the request locally"}; + } + virtual KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const { @@ -499,11 +507,11 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr auto path = zk_request->getPath(); auto parent_path = parentPath(path); - auto it = container.find(parent_path); - if (it == container.end()) + auto node_it = container.find(parent_path); + if (node_it == container.end()) return true; - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); if (node_acls.empty()) return true; @@ -587,8 +595,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); - auto result = storage.commit(zxid, session_id); - if (result != Coordination::Error::ZOK) + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { response.error = result; return response_ptr; @@ -615,11 +622,11 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) + auto node_it = container.find(zk_request->getPath()); + if (node_it == container.end()) return true; - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); if (node_acls.empty()) return true; @@ -640,28 +647,56 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce return {}; } - Coordination::ZooKeeperResponsePtr - process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const { - auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperGetResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it == container.end()) + if constexpr (local) { - response.error = Coordination::Error::ZNONODE; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } + } + + const auto on_error = [&]([[maybe_unused]] const auto error_code) + { + if constexpr (local) + response.error = error_code; + else + fail(); + }; + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + on_error(Coordination::Error::ZNONODE); } else { - response.stat = it->value.stat; - response.data = it->value.getData(); + response.stat = node_it->value.stat; + response.data = node_it->value.getData(); response.error = Coordination::Error::ZOK; } return response_ptr; } + + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } }; struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor @@ -669,11 +704,11 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; - auto it = container.find(parentPath(zk_request->getPath())); - if (it == container.end()) + auto node_it = container.find(parentPath(zk_request->getPath())); + if (node_it == container.end()) return true; - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); if (node_acls.empty()) return true; @@ -740,13 +775,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); - auto result = storage.commit(zxid, session_id); - if (result != Coordination::Error::ZOK) - { - response.error = result; - return response_ptr; - } - + response.error = storage.commit(zxid, session_id); return response_ptr; } @@ -772,28 +801,54 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr return {}; } - Coordination::ZooKeeperResponsePtr - process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /* session_id */, int64_t /* time */) const override + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const { - auto & container = storage.container; - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperExistsResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it != container.end()) + if constexpr (local) { - response.stat = it->value.stat; - response.error = Coordination::Error::ZOK; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } + } + + const auto on_error = [&]([[maybe_unused]] const auto error_code) + { + if constexpr (local) + response.error = error_code; + else + fail(); + }; + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + on_error(Coordination::Error::ZNONODE); } else { - response.error = Coordination::Error::ZNONODE; + response.stat = node_it->value.stat; + response.error = Coordination::Error::ZOK; } return response_ptr; } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } }; struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor @@ -801,11 +856,11 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) + auto node_it = container.find(zk_request->getPath()); + if (node_it == container.end()) return true; - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); if (node_acls.empty()) return true; @@ -857,18 +912,17 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce Coordination::ZooKeeperSetResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); - auto result = storage.commit(zxid, session_id); - if (result != Coordination::Error::ZOK) + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { response.error = result; return response_ptr; } - auto it = container.find(request.path); - if (it == container.end()) + auto node_it = container.find(request.path); + if (node_it == container.end()) fail(); - response.stat = it->value.stat; + response.stat = node_it->value.stat; response.error = Coordination::Error::ZOK; return response_ptr; @@ -886,11 +940,11 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) + auto node_it = container.find(zk_request->getPath()); + if (node_it == container.end()) return true; - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); if (node_acls.empty()) return true; @@ -910,18 +964,36 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc return {}; } - Coordination::ZooKeeperResponsePtr - process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const { - auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperListResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it == container.end()) + if constexpr (local) { - response.error = Coordination::Error::ZNONODE; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } + } + + const auto on_error = [&]([[maybe_unused]] const auto error_code) + { + if constexpr (local) + response.error = error_code; + else + fail(); + }; + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + on_error(Coordination::Error::ZNONODE); } else { @@ -929,18 +1001,28 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc if (path_prefix.empty()) throw DB::Exception("Logical error: path cannot be empty", ErrorCodes::LOGICAL_ERROR); - const auto & children = it->value.getChildren(); + const auto & children = node_it->value.getChildren(); response.names.reserve(children.size()); for (const auto child : children) response.names.push_back(child.toString()); - response.stat = it->value.stat; + response.stat = node_it->value.stat; response.error = Coordination::Error::ZOK; } return response_ptr; } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } }; struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor @@ -948,11 +1030,11 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) + auto node_it = container.find(zk_request->getPath()); + if (node_it == container.end()) return true; - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); if (node_acls.empty()) return true; @@ -976,22 +1058,39 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro return {}; } - Coordination::ZooKeeperResponsePtr - process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const { - auto & container = storage.container; - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperCheckResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it == container.end()) + + if constexpr (local) { - response.error = Coordination::Error::ZNONODE; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } } - else if (request.version != -1 && request.version != it->value.stat.version) + + const auto on_error = [&]([[maybe_unused]] const auto error_code) { - response.error = Coordination::Error::ZBADVERSION; + if constexpr (local) + response.error = error_code; + else + fail(); + }; + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + on_error(Coordination::Error::ZNONODE); + } + else if (request.version != -1 && request.version != node_it->value.stat.version) + { + on_error(Coordination::Error::ZBADVERSION); } else { @@ -1000,6 +1099,16 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro return response_ptr; } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } }; @@ -1008,11 +1117,11 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) + auto node_it = container.find(zk_request->getPath()); + if (node_it == container.end()) return true; - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); if (node_acls.empty()) return true; @@ -1053,17 +1162,16 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperSetACLResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); - auto result = storage.commit(zxid, session_id); - if (result != Coordination::Error::ZOK) + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { response.error = result; return response_ptr; } - auto it = storage.container.find(request.path); - if (it == storage.container.end()) + auto node_it = storage.container.find(request.path); + if (node_it == storage.container.end()) fail(); - response.stat = it->value.stat; + response.stat = node_it->value.stat; response.error = Coordination::Error::ZOK; return response_ptr; @@ -1075,11 +1183,11 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) + auto node_it = container.find(zk_request->getPath()); + if (node_it == container.end()) return true; - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); if (node_acls.empty()) return true; @@ -1100,26 +1208,54 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr return {}; } - Coordination::ZooKeeperResponsePtr - process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperGetACLResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); - auto & container = storage.container; - auto it = container.find(request.path); - if (it == container.end()) + + if constexpr (local) { - response.error = Coordination::Error::ZNONODE; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } + } + + const auto on_error = [&]([[maybe_unused]] const auto error_code) + { + if constexpr (local) + response.error = error_code; + else + fail(); + }; + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + on_error(Coordination::Error::ZNONODE); } else { - response.stat = it->value.stat; - response.acl = storage.acl_map.convertNumber(it->value.acl_id); + response.stat = node_it->value.stat; + response.acl = storage.acl_map.convertNumber(node_it->value.acl_id); } return response_ptr; } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } }; struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor @@ -1165,6 +1301,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { + // manually add deltas so that the result of previous request in the transaction is used in the next request auto & saved_deltas = storage.current_nodes.deltas; std::vector response_errors; @@ -1211,10 +1348,9 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return response_ptr; } - size_t i = 0; - for (const auto & concrete_request : concrete_requests) + for (size_t i = 0; i < concrete_requests.size(); ++i) { - auto cur_response = concrete_request->process(storage, zxid, session_id, time); + auto cur_response = concrete_requests[i]->process(storage, zxid, session_id, time); while (!deltas.empty()) { @@ -1228,8 +1364,39 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro } response.responses[i] = cur_response; + } - ++i; + response.error = Coordination::Error::ZOK; + return response_ptr; + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); + + for (size_t i = 0; i < concrete_requests.size(); ++i) + { + auto cur_response = concrete_requests[i]->process(storage, zxid, session_id, time); + + response.responses[i] = cur_response; + if (cur_response->error != Coordination::Error::ZOK) + { + for (size_t j = 0; j <= i; ++j) + { + auto response_error = response.responses[j]->error; + response.responses[j] = std::make_shared(); + response.responses[j]->error = response_error; + } + + for (size_t j = i + 1; j < response.responses.size(); ++j) + { + response.responses[j] = std::make_shared(); + response.responses[j]->error = Coordination::Error::ZRUNTIMEINCONSISTENCY; + } + + return response_ptr; + } } response.error = Coordination::Error::ZOK; @@ -1389,7 +1556,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( int64_t session_id, int64_t time, std::optional new_last_zxid, - bool check_acl) + bool check_acl, + bool is_local) { KeeperStorage::ResponsesForSessions results; if (new_last_zxid) @@ -1462,7 +1630,11 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( } else { - response = request_processor->process(*this, zxid, session_id, time); + if (is_local) + response = request_processor->processLocal(*this, zxid, session_id, time); + else + response = request_processor->process(*this, zxid, session_id, time); + std::erase_if(current_nodes.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 007c0e60b1d..e25539c7bad 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -1,14 +1,14 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include #include @@ -29,7 +29,6 @@ struct KeeperStorageSnapshot; class KeeperStorage { public: - struct Node { uint64_t acl_id = 0; /// 0 -- no ACL by default @@ -41,26 +40,18 @@ public: Node() : size_bytes(sizeof(Node)) { } /// Object memory size - uint64_t sizeInBytes() const - { - return size_bytes; - } + uint64_t sizeInBytes() const { return size_bytes; } void setData(String new_data); - const auto & getData() const noexcept - { - return data; - } + const auto & getData() const noexcept { return data; } void addChild(StringRef child_path); void removeChild(StringRef child_path); - const auto & getChildren() const noexcept - { - return children; - } + const auto & getChildren() const noexcept { return children; } + private: String data; ChildrenSet children{}; @@ -85,10 +76,7 @@ public: std::string scheme; std::string id; - bool operator==(const AuthID & other) const - { - return scheme == other.scheme && id == other.id; - } + bool operator==(const AuthID & other) const { return scheme == other.scheme && id == other.id; } }; using RequestsForSessions = std::vector; @@ -112,7 +100,7 @@ public: /// container. Container container; - struct CreateNodeDelta + struct CreateNodeDelta { Coordination::Stat stat; bool is_ephemeral; @@ -152,27 +140,18 @@ public: { }; - using Operation = std::variant; + using Operation + = std::variant; struct Delta { - Delta(String path_, int64_t zxid_, Operation operation_) - : path(std::move(path_)) - , zxid(zxid_) - , operation(std::move(operation_)) - {} + Delta(String path_, int64_t zxid_, Operation operation_) : path(std::move(path_)), zxid(zxid_), operation(std::move(operation_)) { } - Delta(int64_t zxid_, Coordination::Error error) - : Delta("", zxid_, ErrorDelta{error}) - {} + Delta(int64_t zxid_, Coordination::Error error) : Delta("", zxid_, ErrorDelta{error}) { } - Delta(int64_t zxid_, SubDeltaEnd subdelta) - : Delta("", zxid_, subdelta) - {} + Delta(int64_t zxid_, SubDeltaEnd subdelta) : Delta("", zxid_, subdelta) { } - Delta(int64_t zxid_, FailedMultiDelta failed_multi) - : Delta("", zxid_, failed_multi) - {} + Delta(int64_t zxid_, FailedMultiDelta failed_multi) : Delta("", zxid_, failed_multi) { } String path; int64_t zxid; @@ -181,7 +160,7 @@ public: struct CurrentNodes { - explicit CurrentNodes(KeeperStorage & storage_) : storage(storage_) {} + explicit CurrentNodes(KeeperStorage & storage_) : storage(storage_) { } template void applyDeltas(StringRef path, const Visitor & visitor) const @@ -206,7 +185,14 @@ public: Coordination::Error commit(int64_t zxid, int64_t session_id); - bool createNode(const std::string & path, String data, const Coordination::Stat & stat, bool is_sequental, bool is_ephemeral, Coordination::ACLs node_acls, int64_t session_id); + bool createNode( + const std::string & path, + String data, + const Coordination::Stat & stat, + bool is_sequental, + bool is_ephemeral, + Coordination::ACLs node_acls, + int64_t session_id); bool removeNode(const std::string & path, int32_t version); /// Mapping session_id -> set of ephemeral nodes paths @@ -228,15 +214,12 @@ public: /// Currently active watches (node_path -> subscribed sessions) Watches watches; - Watches list_watches; /// Watches for 'list' request (watches on children). + Watches list_watches; /// Watches for 'list' request (watches on children). void clearDeadWatches(int64_t session_id); /// Get current zxid - int64_t getZXID() const - { - return zxid; - } + int64_t getZXID() const { return zxid; } const String superdigest; @@ -260,7 +243,13 @@ public: /// Process user request and return response. /// check_acl = false only when converting data from ZooKeeper. - ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl = true); + ResponsesForSessions processRequest( + const Coordination::ZooKeeperRequestPtr & request, + int64_t session_id, + int64_t time, + std::optional new_last_zxid, + bool check_acl = true, + bool is_local = false); void preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid); void finalize(); @@ -268,70 +257,37 @@ public: /// Set of methods for creating snapshots /// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version. - void enableSnapshotMode(size_t up_to_version) - { - container.enableSnapshotMode(up_to_version); - } + void enableSnapshotMode(size_t up_to_version) { container.enableSnapshotMode(up_to_version); } /// Turn off snapshot mode. - void disableSnapshotMode() - { - container.disableSnapshotMode(); - } + void disableSnapshotMode() { container.disableSnapshotMode(); } - Container::const_iterator getSnapshotIteratorBegin() const - { - return container.begin(); - } + Container::const_iterator getSnapshotIteratorBegin() const { return container.begin(); } /// Clear outdated data from internal container. - void clearGarbageAfterSnapshot() - { - container.clearOutdatedNodes(); - } + void clearGarbageAfterSnapshot() { container.clearOutdatedNodes(); } /// Get all active sessions - const SessionAndTimeout & getActiveSessions() const - { - return session_and_timeout; - } + const SessionAndTimeout & getActiveSessions() const { return session_and_timeout; } /// Get all dead sessions - std::vector getDeadSessions() const - { - return session_expiry_queue.getExpiredSessions(); - } + std::vector getDeadSessions() const { return session_expiry_queue.getExpiredSessions(); } /// Introspection functions mostly used in 4-letter commands - uint64_t getNodesCount() const - { - return container.size(); - } + uint64_t getNodesCount() const { return container.size(); } - uint64_t getApproximateDataSize() const - { - return container.getApproximateDataSize(); - } + uint64_t getApproximateDataSize() const { return container.getApproximateDataSize(); } - uint64_t getArenaDataSize() const - { - return container.keyArenaSize(); - } + uint64_t getArenaDataSize() const { return container.keyArenaSize(); } uint64_t getTotalWatchesCount() const; - uint64_t getWatchedPathsCount() const - { - return watches.size() + list_watches.size(); - } + uint64_t getWatchedPathsCount() const { return watches.size() + list_watches.size(); } uint64_t getSessionsWithWatchesCount() const; - uint64_t getSessionWithEphemeralNodesCount() const - { - return ephemerals.size(); - } + uint64_t getSessionWithEphemeralNodesCount() const { return ephemerals.size(); } uint64_t getTotalEphemeralNodesCount() const; void dumpWatches(WriteBufferFromOwnString & buf) const; From 9796527890fd05675a33325c764301b9d1712efd Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 09:16:05 +0000 Subject: [PATCH 067/615] Support Auth check --- src/Coordination/KeeperStorage.cpp | 193 +++++++++++++++-------------- src/Coordination/KeeperStorage.h | 4 +- 2 files changed, 106 insertions(+), 91 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 2f7f4357f8c..fd065bd1249 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -269,6 +269,35 @@ bool KeeperStorage::CurrentNodes::hasNode(StringRef path) const return exists; } +Coordination::ACLs KeeperStorage::CurrentNodes::getACLs(StringRef path) const +{ + std::optional acl_id; + if (auto maybe_node_it = storage.container.find(path); maybe_node_it != storage.container.end()) + acl_id.emplace(maybe_node_it->value.acl_id); + + const Coordination::ACLs * acls{nullptr}; + applyDeltas( + path, + Overloaded{ + [&](const CreateNodeDelta & create_delta) + { + assert(!acl_id); + acls = &create_delta.acls; + }, + [&](const SetACLDelta & set_acl_delta) + { + assert(acl_id || acls); + acls = &set_acl_delta.acls; + }, + [&](auto && /*delta*/) {}, + }); + + if (acls) + return *acls; + + return acl_id ? storage.acl_map.convertNumber(*acl_id) : Coordination::ACLs{}; +} + namespace { [[noreturn]] void fail() @@ -304,11 +333,11 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i }, [&, &path = delta.path](KeeperStorage::UpdateNodeDelta & update_delta) { - auto it = container.find(path); - if (it == container.end()) + auto node_it = container.find(path); + if (node_it == container.end()) fail(); - if (update_delta.version != -1 && update_delta.version != it->value.stat.version) + if (update_delta.version != -1 && update_delta.version != node_it->value.stat.version) fail(); container.updateValue(path, update_delta.update_fn); @@ -323,14 +352,14 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i }, [&, &path = delta.path](KeeperStorage::SetACLDelta & acl_delta) { - auto it = container.find(path); - if (it != container.end()) + auto node_it = container.find(path); + if (node_it != container.end()) fail(); - if (acl_delta.version != -1 && acl_delta.version != it->value.stat.aversion) + if (acl_delta.version != -1 && acl_delta.version != node_it->value.stat.aversion) fail(); - acl_map.removeUsage(it->value.acl_id); + acl_map.removeUsage(node_it->value.acl_id); uint64_t acl_id = acl_map.convertACLs(acl_delta.acls); acl_map.addUsage(acl_id); @@ -372,12 +401,12 @@ bool KeeperStorage::createNode( int64_t session_id) { auto parent_path = parentPath(path); - auto it = container.find(parent_path); + auto node_it = container.find(parent_path); - if (it == container.end()) + if (node_it == container.end()) return false; - if (it->value.stat.ephemeralOwner != 0) + if (node_it->value.stat.ephemeralOwner != 0) return false; if (container.contains(path)) @@ -406,17 +435,17 @@ bool KeeperStorage::createNode( bool KeeperStorage::removeNode(const std::string & path, int32_t version) { - auto it = container.find(path); - if (it == container.end()) + auto node_it = container.find(path); + if (node_it == container.end()) return false; - if (version != -1 && version != it->value.stat.version) + if (version != -1 && version != node_it->value.stat.version) return false; - if (it->value.stat.numChildren) + if (node_it->value.stat.numChildren) return false; - auto prev_node = it->value; + auto prev_node = node_it->value; if (prev_node.stat.ephemeralOwner != 0) { auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner); @@ -428,7 +457,8 @@ bool KeeperStorage::removeNode(const std::string & path, int32_t version) acl_map.removeUsage(prev_node.acl_id); container.updateValue( - parentPath(path), [child_basename = getBaseName(it->key)](KeeperStorage::Node & parent) { parent.removeChild(child_basename); }); + parentPath(path), + [child_basename = getBaseName(node_it->key)](KeeperStorage::Node & parent) { parent.removeChild(child_basename); }); container.erase(path); return true; @@ -459,7 +489,7 @@ struct KeeperStorageRequestProcessor { return {}; } - virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/) const { return true; } + virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/, bool /*is_local*/) const { return true; } virtual ~KeeperStorageRequestProcessor() = default; }; @@ -489,6 +519,21 @@ struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProc namespace { + + Coordination::ACLs getACLs(KeeperStorage & storage, StringRef path, bool is_local) + { + if (is_local) + { + auto node_it = storage.container.find(path); + if (node_it == storage.container.end()) + return {}; + + return storage.acl_map.convertNumber(node_it->value.acl_id); + } + + return storage.current_nodes.getACLs(path); + } + } struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor @@ -501,17 +546,12 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); } - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; auto path = zk_request->getPath(); auto parent_path = parentPath(path); - auto node_it = container.find(parent_path); - if (node_it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); + const auto node_acls = getACLs(storage, parent_path, is_local); if (node_acls.empty()) return true; @@ -619,14 +659,9 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto node_it = container.find(zk_request->getPath()); - if (node_it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); + const auto node_acls = getACLs(storage, zk_request->getPath(), is_local); if (node_acls.empty()) return true; @@ -701,14 +736,9 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto node_it = container.find(parentPath(zk_request->getPath())); - if (node_it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); + const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); if (node_acls.empty()) return true; @@ -853,14 +883,9 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto node_it = container.find(zk_request->getPath()); - if (node_it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); + const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); if (node_acls.empty()) return true; @@ -937,14 +962,9 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto node_it = container.find(zk_request->getPath()); - if (node_it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); + const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); if (node_acls.empty()) return true; @@ -1027,14 +1047,9 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto node_it = container.find(zk_request->getPath()); - if (node_it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); + const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); if (node_acls.empty()) return true; @@ -1114,14 +1129,9 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto node_it = container.find(zk_request->getPath()); - if (node_it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); + const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); if (node_acls.empty()) return true; @@ -1180,14 +1190,9 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto node_it = container.find(zk_request->getPath()); - if (node_it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(node_it->value.acl_id); + const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); if (node_acls.empty()) return true; @@ -1260,10 +1265,10 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { for (const auto & concrete_request : concrete_requests) - if (!concrete_request->checkAuth(storage, session_id)) + if (!concrete_request->checkAuth(storage, session_id, is_local)) return false; return true; } @@ -1542,11 +1547,19 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() void KeeperStorage::preprocessRequest( - const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid) + const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, int64_t new_last_zxid, bool check_acl) { current_nodes.current_zxid = new_last_zxid; - KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(request); + KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + + if (check_acl && !request_processor->checkAuth(*this, session_id, false)) + { + current_nodes.deltas.push_back(Delta{new_last_zxid, Coordination::Error::ZNOAUTH}); + return; + } + auto new_deltas = request_processor->preprocess(*this, new_last_zxid, session_id, time); + current_nodes.deltas.insert( current_nodes.deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); } @@ -1573,10 +1586,10 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { - auto it = ephemerals.find(session_id); - if (it != ephemerals.end()) + auto session_ephemerals = ephemerals.find(session_id); + if (session_ephemerals != ephemerals.end()) { - for (const auto & ephemeral_path : it->second) + for (const auto & ephemeral_path : session_ephemerals->second) { container.updateValue( parentPath(ephemeral_path), @@ -1593,7 +1606,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED); results.insert(results.end(), responses.begin(), responses.end()); } - ephemerals.erase(it); + ephemerals.erase(session_ephemerals); } clearDeadWatches(session_id); auto auth_it = session_and_auth.find(session_id); @@ -1622,19 +1635,19 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); Coordination::ZooKeeperResponsePtr response; - if (check_acl && !request_processor->checkAuth(*this, session_id)) + if (is_local) { - response = zk_request->makeResponse(); - /// Original ZooKeeper always throws no auth, even when user provided some credentials - response->error = Coordination::Error::ZNOAUTH; + if (check_acl && !request_processor->checkAuth(*this, session_id, true)) + { + response = zk_request->makeResponse(); + /// Original ZooKeeper always throws no auth, even when user provided some credentials + response->error = Coordination::Error::ZNOAUTH; + } + response = request_processor->processLocal(*this, zxid, session_id, time); } else { - if (is_local) - response = request_processor->processLocal(*this, zxid, session_id, time); - else - response = request_processor->process(*this, zxid, session_id, time); - + response = request_processor->process(*this, zxid, session_id, time); std::erase_if(current_nodes.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index e25539c7bad..2fa87328dfd 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -174,6 +174,7 @@ public: std::shared_ptr getNode(StringRef path); bool hasNode(StringRef path) const; + Coordination::ACLs getACLs(StringRef path) const; std::unordered_map> node_to_deltas; std::deque deltas; @@ -250,7 +251,8 @@ public: std::optional new_last_zxid, bool check_acl = true, bool is_local = false); - void preprocessRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid); + void preprocessRequest( + const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid, bool check_acl = true); void finalize(); From 285bb21b919de27236f3505f0b2a4e7d1faa3550 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 09:35:16 +0000 Subject: [PATCH 068/615] Define close session --- src/Coordination/KeeperStorage.cpp | 57 ++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index fd065bd1249..8b12b59e50b 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1552,14 +1552,37 @@ void KeeperStorage::preprocessRequest( current_nodes.current_zxid = new_last_zxid; KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special + { + auto & deltas = current_nodes.deltas; + auto session_ephemerals = ephemerals.find(session_id); + if (session_ephemerals != ephemerals.end()) + { + for (const auto & ephemeral_path : session_ephemerals->second) + { + if (current_nodes.hasNode(ephemeral_path)) + { + deltas.emplace_back(parentPath(ephemeral_path).toString(), new_last_zxid, UpdateNodeDelta{[ephemeral_path](Node & parent) + { + --parent.stat.numChildren; + ++parent.stat.cversion; + }}); + + deltas.emplace_back(ephemeral_path, new_last_zxid, RemoveNodeDelta()); + } + } + } + + return; + } + if (check_acl && !request_processor->checkAuth(*this, session_id, false)) { - current_nodes.deltas.push_back(Delta{new_last_zxid, Coordination::Error::ZNOAUTH}); + current_nodes.deltas.emplace_back(new_last_zxid, Coordination::Error::ZNOAUTH); return; } auto new_deltas = request_processor->preprocess(*this, new_last_zxid, session_id, time); - current_nodes.deltas.insert( current_nodes.deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); } @@ -1586,28 +1609,24 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { - auto session_ephemerals = ephemerals.find(session_id); - if (session_ephemerals != ephemerals.end()) + ephemerals.erase(session_id); + + commit(zxid, session_id); + + for (const auto & delta : current_nodes.deltas) { - for (const auto & ephemeral_path : session_ephemerals->second) + if (delta.zxid > zxid) + break; + + if (std::holds_alternative(delta.operation)) { - container.updateValue( - parentPath(ephemeral_path), - [&ephemeral_path](KeeperStorage::Node & parent) - { - --parent.stat.numChildren; - ++parent.stat.cversion; - auto base_name = getBaseName(ephemeral_path); - parent.removeChild(base_name); - }); - - container.erase(ephemeral_path); - - auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED); + auto responses = processWatchesImpl(delta.path, watches, list_watches, Coordination::Event::DELETED); results.insert(results.end(), responses.begin(), responses.end()); } - ephemerals.erase(session_ephemerals); } + + std::erase_if(current_nodes.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); + clearDeadWatches(session_id); auto auth_it = session_and_auth.find(session_id); if (auth_it != session_and_auth.end()) From 246a5043b42b65f6ff6580d1e47d2489c108e081 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 09:42:23 +0000 Subject: [PATCH 069/615] Rewrite commit with single lambda --- src/Coordination/KeeperStorage.cpp | 66 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 8b12b59e50b..9f342c92ad7 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -316,69 +316,72 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i bool finish_subdelta = false; auto result = std::visit( - Overloaded{ - [&, &path = delta.path](KeeperStorage::CreateNodeDelta & create_delta) + [&, &path = delta.path](const DeltaType & operation) -> Coordination::Error + { + if constexpr (std::same_as) { if (!createNode( path, - std::move(create_delta.data), - create_delta.stat, - create_delta.is_sequental, - create_delta.is_ephemeral, - std::move(create_delta.acls), + std::move(operation.data), + operation.stat, + operation.is_sequental, + operation.is_ephemeral, + std::move(operation.acls), session_id)) fail(); return Coordination::Error::ZOK; - }, - [&, &path = delta.path](KeeperStorage::UpdateNodeDelta & update_delta) + } + else if constexpr (std::same_as) { auto node_it = container.find(path); if (node_it == container.end()) fail(); - if (update_delta.version != -1 && update_delta.version != node_it->value.stat.version) + if (operation.version != -1 && operation.version != node_it->value.stat.version) fail(); - container.updateValue(path, update_delta.update_fn); + container.updateValue(path, operation.update_fn); return Coordination::Error::ZOK; - }, - [&, &path = delta.path](KeeperStorage::RemoveNodeDelta & remove_delta) + } + else if constexpr (std::same_as) { - if (!removeNode(path, remove_delta.version)) + if (!removeNode(path, operation.version)) fail(); return Coordination::Error::ZOK; - }, - [&, &path = delta.path](KeeperStorage::SetACLDelta & acl_delta) + } + else if constexpr (std::same_as) { auto node_it = container.find(path); if (node_it != container.end()) fail(); - if (acl_delta.version != -1 && acl_delta.version != node_it->value.stat.aversion) + if (operation.version != -1 && operation.version != node_it->value.stat.aversion) fail(); acl_map.removeUsage(node_it->value.acl_id); - uint64_t acl_id = acl_map.convertACLs(acl_delta.acls); + uint64_t acl_id = acl_map.convertACLs(operation.acls); acl_map.addUsage(acl_id); container.updateValue(path, [acl_id](KeeperStorage::Node & node) { node.acl_id = acl_id; }); return Coordination::Error::ZOK; - }, - [&](KeeperStorage::ErrorDelta & error_delta) { return error_delta.error; }, - [&](KeeperStorage::SubDeltaEnd &) + } + else if constexpr (std::same_as) + return operation.error; + else if constexpr (std::same_as) { finish_subdelta = true; return Coordination::Error::ZOK; - }, - [&](KeeperStorage::FailedMultiDelta &) -> Coordination::Error + } + else { - // this shouldn't be called in any process functions + // shouldn't be called in any process functions fail(); - }}, + } + }, delta.operation); if (result != Coordination::Error::ZOK) @@ -1562,11 +1565,14 @@ void KeeperStorage::preprocessRequest( { if (current_nodes.hasNode(ephemeral_path)) { - deltas.emplace_back(parentPath(ephemeral_path).toString(), new_last_zxid, UpdateNodeDelta{[ephemeral_path](Node & parent) - { - --parent.stat.numChildren; - ++parent.stat.cversion; - }}); + deltas.emplace_back( + parentPath(ephemeral_path).toString(), + new_last_zxid, + UpdateNodeDelta{[ephemeral_path](Node & parent) + { + --parent.stat.numChildren; + ++parent.stat.cversion; + }}); deltas.emplace_back(ephemeral_path, new_last_zxid, RemoveNodeDelta()); } From 7c7bac180fa18557068beed71b797b8b7f2dea34 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 09:45:03 +0000 Subject: [PATCH 070/615] Rename variables with short names --- src/Coordination/KeeperStorage.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 9f342c92ad7..142655d8c40 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -51,7 +51,7 @@ namespace String generateDigest(const String & userdata) { std::vector user_password; - boost::split(user_password, userdata, [](char c) { return c == ':'; }); + boost::split(user_password, userdata, [](char character) { return character == ':'; }); return user_password[0] + ":" + base64Encode(getSHA1(userdata)); } @@ -128,8 +128,8 @@ namespace const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) { KeeperStorage::ResponsesForSessions result; - auto it = watches.find(path); - if (it != watches.end()) + auto watch_it = watches.find(path); + if (watch_it != watches.end()) { std::shared_ptr watch_response = std::make_shared(); watch_response->path = path; @@ -137,10 +137,10 @@ namespace watch_response->zxid = -1; watch_response->type = event_type; watch_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : it->second) + for (auto watcher_session : watch_it->second) result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); - watches.erase(it); + watches.erase(watch_it); } auto parent_path = parentPath(path); @@ -159,8 +159,8 @@ namespace for (const auto & path_to_check : paths_to_check_for_list_watches) { - it = list_watches.find(path_to_check); - if (it != list_watches.end()) + watch_it = list_watches.find(path_to_check); + if (watch_it != list_watches.end()) { std::shared_ptr watch_list_response = std::make_shared(); @@ -173,10 +173,10 @@ namespace watch_list_response->type = Coordination::Event::DELETED; watch_list_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : it->second) + for (auto watcher_session : watch_it->second) result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); - list_watches.erase(it); + list_watches.erase(watch_it); } } return result; @@ -1503,11 +1503,11 @@ public: KeeperStorageRequestProcessorPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const { - auto it = op_num_to_request.find(zk_request->getOpNum()); - if (it == op_num_to_request.end()) + auto request_it = op_num_to_request.find(zk_request->getOpNum()); + if (request_it == op_num_to_request.end()) throw DB::Exception("Unknown operation type " + toString(zk_request->getOpNum()), ErrorCodes::LOGICAL_ERROR); - return it->second(zk_request); + return request_it->second(zk_request); } void registerRequest(Coordination::OpNum op_num, Creator creator) From f5bdef44352ce49751efe3875b562c08f984abd7 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 09:57:06 +0000 Subject: [PATCH 071/615] Remove indentation --- src/Coordination/KeeperStorage.cpp | 255 +++++++++++++++-------------- 1 file changed, 128 insertions(+), 127 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 142655d8c40..b353e70e98d 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -29,158 +29,159 @@ namespace ErrorCodes namespace { - String base64Encode(const String & decoded) - { - std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - ostr.exceptions(std::ios::failbit); - Poco::Base64Encoder encoder(ostr); - encoder.rdbuf()->setLineLength(0); - encoder << decoded; - encoder.close(); - return ostr.str(); - } +String base64Encode(const String & decoded) +{ + std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + ostr.exceptions(std::ios::failbit); + Poco::Base64Encoder encoder(ostr); + encoder.rdbuf()->setLineLength(0); + encoder << decoded; + encoder.close(); + return ostr.str(); +} - String getSHA1(const String & userdata) - { - Poco::SHA1Engine engine; - engine.update(userdata); - const auto & digest_id = engine.digest(); - return String{digest_id.begin(), digest_id.end()}; - } +String getSHA1(const String & userdata) +{ + Poco::SHA1Engine engine; + engine.update(userdata); + const auto & digest_id = engine.digest(); + return String{digest_id.begin(), digest_id.end()}; +} - String generateDigest(const String & userdata) - { - std::vector user_password; - boost::split(user_password, userdata, [](char character) { return character == ':'; }); - return user_password[0] + ":" + base64Encode(getSHA1(userdata)); - } +String generateDigest(const String & userdata) +{ + std::vector user_password; + boost::split(user_password, userdata, [](char character) { return character == ':'; }); + return user_password[0] + ":" + base64Encode(getSHA1(userdata)); +} - bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector & session_auths) - { - if (node_acls.empty()) +bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector & session_auths) +{ + if (node_acls.empty()) + return true; + + for (const auto & session_auth : session_auths) + if (session_auth.scheme == "super") return true; - for (const auto & session_auth : session_auths) - if (session_auth.scheme == "super") + for (const auto & node_acl : node_acls) + { + if (node_acl.permissions & permission) + { + if (node_acl.scheme == "world" && node_acl.id == "anyone") return true; - for (const auto & node_acl : node_acls) - { - if (node_acl.permissions & permission) + for (const auto & session_auth : session_auths) { - if (node_acl.scheme == "world" && node_acl.id == "anyone") + if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) return true; - - for (const auto & session_auth : session_auths) - { - if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) - return true; - } } } - - return false; } - bool fixupACL( - const std::vector & request_acls, - const std::vector & current_ids, - std::vector & result_acls) + return false; +} + +bool fixupACL( + const std::vector & request_acls, + const std::vector & current_ids, + std::vector & result_acls) +{ + if (request_acls.empty()) + return true; + + bool valid_found = false; + for (const auto & request_acl : request_acls) { - if (request_acls.empty()) - return true; - - bool valid_found = false; - for (const auto & request_acl : request_acls) + if (request_acl.scheme == "auth") { - if (request_acl.scheme == "auth") + for (const auto & current_id : current_ids) { - for (const auto & current_id : current_ids) - { - valid_found = true; - Coordination::ACL new_acl = request_acl; - new_acl.scheme = current_id.scheme; - new_acl.id = current_id.id; - result_acls.push_back(new_acl); - } - } - else if (request_acl.scheme == "world" && request_acl.id == "anyone") - { - /// We don't need to save default ACLs valid_found = true; - } - else if (request_acl.scheme == "digest") - { Coordination::ACL new_acl = request_acl; - - /// Bad auth - if (std::count(new_acl.id.begin(), new_acl.id.end(), ':') != 1) - return false; - - valid_found = true; + new_acl.scheme = current_id.scheme; + new_acl.id = current_id.id; result_acls.push_back(new_acl); } } - return valid_found; - } + else if (request_acl.scheme == "world" && request_acl.id == "anyone") + { + /// We don't need to save default ACLs + valid_found = true; + } + else if (request_acl.scheme == "digest") + { + Coordination::ACL new_acl = request_acl; - KeeperStorage::ResponsesForSessions processWatchesImpl( - const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) + /// Bad auth + if (std::count(new_acl.id.begin(), new_acl.id.end(), ':') != 1) + return false; + + valid_found = true; + result_acls.push_back(new_acl); + } + } + return valid_found; +} + +KeeperStorage::ResponsesForSessions processWatchesImpl( + const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) +{ + KeeperStorage::ResponsesForSessions result; + auto watch_it = watches.find(path); + if (watch_it != watches.end()) { - KeeperStorage::ResponsesForSessions result; - auto watch_it = watches.find(path); - if (watch_it != watches.end()) - { - std::shared_ptr watch_response = std::make_shared(); - watch_response->path = path; - watch_response->xid = Coordination::WATCH_XID; - watch_response->zxid = -1; - watch_response->type = event_type; - watch_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : watch_it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); + std::shared_ptr watch_response = std::make_shared(); + watch_response->path = path; + watch_response->xid = Coordination::WATCH_XID; + watch_response->zxid = -1; + watch_response->type = event_type; + watch_response->state = Coordination::State::CONNECTED; + for (auto watcher_session : watch_it->second) + result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); - watches.erase(watch_it); - } - - auto parent_path = parentPath(path); - - Strings paths_to_check_for_list_watches; - if (event_type == Coordination::Event::CREATED) - { - paths_to_check_for_list_watches.push_back(parent_path.toString()); /// Trigger list watches for parent - } - else if (event_type == Coordination::Event::DELETED) - { - paths_to_check_for_list_watches.push_back(path); /// Trigger both list watches for this path - paths_to_check_for_list_watches.push_back(parent_path.toString()); /// And for parent path - } - /// CHANGED event never trigger list wathes - - for (const auto & path_to_check : paths_to_check_for_list_watches) - { - watch_it = list_watches.find(path_to_check); - if (watch_it != list_watches.end()) - { - std::shared_ptr watch_list_response - = std::make_shared(); - watch_list_response->path = path_to_check; - watch_list_response->xid = Coordination::WATCH_XID; - watch_list_response->zxid = -1; - if (path_to_check == parent_path) - watch_list_response->type = Coordination::Event::CHILD; - else - watch_list_response->type = Coordination::Event::DELETED; - - watch_list_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : watch_it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); - - list_watches.erase(watch_it); - } - } - return result; + watches.erase(watch_it); } + + auto parent_path = parentPath(path); + + Strings paths_to_check_for_list_watches; + if (event_type == Coordination::Event::CREATED) + { + paths_to_check_for_list_watches.push_back(parent_path.toString()); /// Trigger list watches for parent + } + else if (event_type == Coordination::Event::DELETED) + { + paths_to_check_for_list_watches.push_back(path); /// Trigger both list watches for this path + paths_to_check_for_list_watches.push_back(parent_path.toString()); /// And for parent path + } + /// CHANGED event never trigger list wathes + + for (const auto & path_to_check : paths_to_check_for_list_watches) + { + watch_it = list_watches.find(path_to_check); + if (watch_it != list_watches.end()) + { + std::shared_ptr watch_list_response + = std::make_shared(); + watch_list_response->path = path_to_check; + watch_list_response->xid = Coordination::WATCH_XID; + watch_list_response->zxid = -1; + if (path_to_check == parent_path) + watch_list_response->type = Coordination::Event::CHILD; + else + watch_list_response->type = Coordination::Event::DELETED; + + watch_list_response->state = Coordination::State::CONNECTED; + for (auto watcher_session : watch_it->second) + result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); + + list_watches.erase(watch_it); + } + } + return result; +} + } void KeeperStorage::Node::setData(String new_data) From a89b57aeb65e72f08fa0157a090edb318e89cf18 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 10:19:20 +0000 Subject: [PATCH 072/615] Fix commit --- src/Coordination/KeeperStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index b353e70e98d..2a123e6a352 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -370,7 +370,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) return operation.error; else if constexpr (std::same_as) { From 04fdd75c5627b36e71cdcf1d1fc75c1aa746785b Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 9 May 2022 11:13:44 +0000 Subject: [PATCH 073/615] Make JSONColumns frormats mono block by default --- docs/en/interfaces/formats.md | 24 ++-- docs/en/operations/settings/settings.md | 6 - src/Core/Settings.h | 2 - src/Formats/FormatFactory.cpp | 1 - src/Formats/FormatSettings.h | 5 - src/Processors/Chunk.cpp | 4 +- src/Processors/Chunk.h | 2 +- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 3 + .../Formats/Impl/CHColumnToArrowColumn.cpp | 1 + .../Impl/JSONColumnsBaseBlockInputFormat.cpp | 4 +- .../Impl/JSONColumnsBaseBlockOutputFormat.cpp | 26 +--- .../Impl/JSONColumnsBaseBlockOutputFormat.h | 10 +- .../Impl/JSONColumnsBlockOutputFormat.cpp | 23 ++-- .../Impl/JSONColumnsBlockOutputFormat.h | 6 +- ...ONColumnsWithMetadataBlockOutputFormat.cpp | 10 +- ...JSONColumnsWithMetadataBlockOutputFormat.h | 5 +- .../JSONCompactColumnsBlockOutputFormat.cpp | 23 ++-- .../JSONCompactColumnsBlockOutputFormat.h | 7 +- .../Impl/PrettyCompactBlockOutputFormat.cpp | 2 +- .../02293_formats_json_columns.reference | 120 ------------------ .../0_stateless/02293_formats_json_columns.sh | 26 ---- 21 files changed, 59 insertions(+), 251 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index d4a68ca72fa..3119827998f 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -31,12 +31,10 @@ The supported formats are: | [JSONAsString](#jsonasstring) | ✔ | ✗ | | [JSONStrings](#jsonstrings) | ✗ | ✔ | | [JSONColumns](#jsoncolumns) | ✔ | ✔ | -| [JSONColumnsMonoBlock](#jsoncolumnsmonoblock) | ✗ | ✔ | | [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | ✗ | ✔ | | [JSONCompact](#jsoncompact) | ✗ | ✔ | | [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | | [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | -| [JSONCompactColumnsMonoBlock](#jsoncompactcolumnsmonoblock) | ✗ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | | [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | | [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | @@ -572,8 +570,10 @@ Example: ## JSONColumns {#jsoncolumns} -In this format, each block of data is represented as a JSON Object: +In this format, all data is represented as a single JSON Object. +Note that JSONColumns output format buffers all data in memory to output it as a single block. +Example: ```json { "name1": [1, 2, 3, 4], @@ -585,16 +585,13 @@ In this format, each block of data is represented as a JSON Object: Columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) -## JSONColumnsMonoBlock {#jsoncolumnsmonoblock} - -Differs from JSONColumns in that it buffers up to [output_format_json_columns_max_rows_to_buffer](../operations/settings/settings.md#output-format-json-columns-max-rows-to-buffer) -rows and then outputs them as a single block. ## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} -Differs from JSON output format in that it outputs columns as in JSONColumns format. This format buffers up to [output_format_json_columns_max_rows_to_buffer](../operations/settings/settings.md#session_settings-output-format-json-columns-max-rows-to-buffer) -rows and then outputs them as a single block. +Differs from JSON output format in that it outputs columns as in JSONColumns format. +This format buffers all data in memory and then outputs them as a single block. +Example: ```json { "meta": @@ -766,8 +763,10 @@ Example: ## JSONCompactColumns {#jsoncompactcolumns} -In this format, each block of data is represented as a JSON array of arrays: +In this format, all data is represented as a single JSON Array. +Note that JSONCompactColumns output format buffers all data in memory to output it as a single block. +Example: ```json [ [1, 2, 3, 4], @@ -778,11 +777,6 @@ In this format, each block of data is represented as a JSON array of arrays: Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) -## JSONCompactColumnsMonoBlock {#jsoncompactcolumnsmonoblock} - -Differs from JSONCompactColumns in that it buffers up to [output_format_json_columns_max_rows_to_buffer](../operations/settings/settings.md#session_settings-output-format-json-columns-max-rows-to-buffer) -rows and then outputs them as a single block. - ## JSONEachRow {#jsoneachrow} ## JSONStringsEachRow {#jsonstringseachrow} ## JSONCompactEachRow {#jsoncompacteachrow} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ef52ab415bf..8f2b9bc86fc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3784,12 +3784,6 @@ Possible values: Default value: `0`. -## output_format_json_columns_max_rows_to_buffer {#output-format-json-columns-max-rows-to-buffer} - -The maximum rows to buffer in formats JSONColumnsMonoBlock/JSONCompactColumnsMonoBlock/JSONColumnsWithMetadata - -Default value: `10000`. - ## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 958eba86a77..be73465eea0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -730,8 +730,6 @@ class IColumn; \ M(String, input_format_mysql_dump_table_name, "", "Name of the table in MySQL dump from which to read data", 0) \ M(Bool, input_format_mysql_dump_map_column_names, true, "Match columns from table in MySQL dump and columns from ClickHouse table by names", 0) \ - \ - M(UInt64, output_format_json_columns_max_rows_to_buffer, 10000, "Max rows to buffer in JSONColumnsMonoBlock, JSONCompactColumnsMonoBlock and JSONColumnsWithMetadata format", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index d70200aac4a..96b52cd2423 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -152,7 +152,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; - format_settings.json_columns.max_rows_to_buffer = settings.output_format_json_columns_max_rows_to_buffer; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 13556244410..4f77fe099e1 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -268,11 +268,6 @@ struct FormatSettings String table_name; bool map_column_names = true; } mysql_dump; - - struct - { - size_t max_rows_to_buffer = 10000; - } json_columns; }; } diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 7f46a4140de..61bd118636d 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -169,13 +169,13 @@ std::string Chunk::dumpStructure() const return out.str(); } -void Chunk::append(const Chunk & chunk, size_t length) +void Chunk::append(const Chunk & chunk) { MutableColumns mutation = mutateColumns(); for (size_t position = 0; position < mutation.size(); ++position) { auto column = chunk.getColumns()[position]; - mutation[position]->insertRangeFrom(*column, 0, std::min(length, column->size())); + mutation[position]->insertRangeFrom(*column, 0, column->size()); } size_t rows = mutation[0]->size(); setColumns(std::move(mutation), rows); diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index ddf3971fb43..0c0869b336a 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -101,7 +101,7 @@ public: std::string dumpStructure() const; - void append(const Chunk & chunk, size_t length); + void append(const Chunk & chunk); private: Columns columns; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index c792d828e44..7449dc75c8b 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -36,6 +36,8 @@ #include #include +#include + /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ @@ -473,6 +475,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto arrow_indexes_column = std::make_shared(indexes_array); auto indexes_column = readColumnWithIndexesData(arrow_indexes_column); + LOG_DEBUG(&Poco::Logger::get("Arrow"), "Indexes types: {} {}", arrow_indexes_column->type()->name(), indexes_column->getName()); auto lc_column = ColumnLowCardinality::create(dict_values->column, indexes_column); auto lc_type = std::make_shared(dict_values->type); return {std::move(lc_column), std::move(lc_type), column_name}; diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 043e4f1e724..f4f82e81234 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -308,6 +308,7 @@ namespace DB } status = builder->AppendIndices(indexes.data(), indexes.size(), arrow_null_bytemap_raw_ptr); + std::cerr << assert_cast(builder->type().get())->index_type()->name() << "\n"; checkStatus(status, column->getName(), format_name); } diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp index ae3b0962dc8..f7cd3c5ec6d 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp @@ -153,8 +153,8 @@ Chunk JSONColumnsBaseBlockInputFormat::generate() if (rows <= 0) return Chunk(std::move(columns), 0); - /// Insert defaults in columns that were not presented in current block and fill - ///block_missing_values accordingly if setting input_format_defaults_for_omitted_fields is enabled + /// Insert defaults in columns that were not presented in block and fill + /// block_missing_values accordingly if setting input_format_defaults_for_omitted_fields is enabled for (size_t i = 0; i != seen_columns.size(); ++i) { if (!seen_columns[i]) diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp index c998b95067c..bd920bd6367 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp @@ -7,46 +7,30 @@ namespace DB { JSONColumnsBaseBlockOutputFormat::JSONColumnsBaseBlockOutputFormat( - WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_) + WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_) , format_settings(format_settings_) - , mono_block(mono_block_) , serializations(header_.getSerializations()) , ostr(&out) - , max_rows_in_mono_block(format_settings_.json_columns.max_rows_to_buffer) { } void JSONColumnsBaseBlockOutputFormat::consume(Chunk chunk) { - if (!mono_block) - { - writeChunk(chunk); - return; - } - if (!mono_chunk) { mono_chunk = std::move(chunk); - total_rows_in_mono_block = mono_chunk.getNumRows(); return; } - /// Copy up to (max_rows_in_mono_block - total_rows_in_mono_block) rows. - size_t length = chunk.getNumRows(); - if (total_rows_in_mono_block + length > max_rows_in_mono_block) - length = max_rows_in_mono_block - total_rows_in_mono_block; - mono_chunk.append(chunk, length); - total_rows_in_mono_block += length; + mono_chunk.append(chunk); } void JSONColumnsBaseBlockOutputFormat::writeSuffix() { - if (mono_chunk) - { - writeChunk(mono_chunk); - mono_chunk.clear(); - } + + writeChunk(mono_chunk); + mono_chunk.clear(); } void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk) diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h index 2c680c9e4cd..c7bb0edb207 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h @@ -11,13 +11,11 @@ namespace DB class WriteBuffer; /// Base class for Columnar JSON output formats. -/// It outputs data block by block. If mono_block_ argument is true, -/// it will buffer up to output_format_json_columns_max_rows_to_buffer rows -/// and outputs them as a single block in writeSuffix() method. +/// It buffers all data and outputs it as a single block in writeSuffix() method. class JSONColumnsBaseBlockOutputFormat : public IOutputFormat { public: - JSONColumnsBaseBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_); + JSONColumnsBaseBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); String getName() const override { return "JSONColumnsBaseBlockOutputFormat"; } @@ -34,15 +32,11 @@ protected: void writeColumnEnd(bool is_last); const FormatSettings format_settings; - bool mono_block; Serializations serializations; WriteBuffer * ostr; - /// For mono_block == true only Chunk mono_chunk; - size_t max_rows_in_mono_block; - size_t total_rows_in_mono_block = 0; }; } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp index e8b1b303ebd..c018751f1fb 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp @@ -7,8 +7,8 @@ namespace DB { -JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, size_t indent_) - : JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_, mono_block_), fields(header_.getNamesAndTypes()), indent(indent_) +JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, size_t indent_) + : JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_), fields(header_.getNamesAndTypes()), indent(indent_) { for (auto & field : fields) { @@ -36,19 +36,14 @@ void JSONColumnsBlockOutputFormat::writeChunkEnd() void registerOutputFormatJSONColumns(FormatFactory & factory) { - for (const auto & [name, mono_block] : {std::make_pair("JSONColumns", false), std::make_pair("JSONColumnsMonoBlock", true)}) + factory.registerOutputFormat("JSONColumns", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams &, + const FormatSettings & format_settings) { - factory.registerOutputFormat(name, [mono_block = mono_block]( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams &, - const FormatSettings & format_settings) - { - return std::make_shared(buf, sample, format_settings, mono_block); - }); - } - - factory.markOutputFormatSupportsParallelFormatting("JSONColumns"); + return std::make_shared(buf, sample, format_settings); + }); } } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h index c7ac009654a..afac6d9f223 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h @@ -5,19 +5,17 @@ namespace DB { -/* Format JSONColumns outputs each block of data in the next format: +/* Format JSONColumns outputs all data as a single block in the next format: * { * "name1": [value1, value2, value3, ...], * "name2": [value1, value2m value3, ...], * ... * } - * There is also JSONColumnsMonoBlock format that buffers up to output_format_json_columns_max_rows_to_buffer rows - * and outputs them as a single block. */ class JSONColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat { public: - JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, size_t indent_ = 0); + JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, size_t indent_ = 0); String getName() const override { return "JSONColumnsBlockOutputFormat"; } diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index eed21e8f9c0..2b41f1d4a4d 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes } JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : JSONColumnsBlockOutputFormat(out_, header_, format_settings_, true, 1) + : JSONColumnsBlockOutputFormat(out_, header_, format_settings_, 1) { bool need_validate_utf8 = false; makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8); @@ -31,6 +31,12 @@ void JSONColumnsWithMetadataBlockOutputFormat::writePrefix() writeJSONMetadata(fields, format_settings, *ostr); } +void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix() +{ + rows = mono_chunk.getNumRows(); + JSONColumnsBaseBlockOutputFormat::writeSuffix(); +} + void JSONColumnsWithMetadataBlockOutputFormat::writeChunkStart() { writeJSONFieldDelimiter(*ostr, 2); @@ -84,7 +90,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() statistics = std::move(*outside_statistics); writeJSONAdditionalInfo( - total_rows_in_mono_block, + rows, statistics.rows_before_limit, statistics.applied_limit, statistics.watch, diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h index 92be568504e..f56a79bdf56 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h @@ -4,8 +4,7 @@ namespace DB { -/* Format JSONColumnsWithMetadata buffers up to output_format_json_columns_max_rows_to_buffer rows - * and outputs them in the next format: +/* Format JSONColumnsWithMetadata outputs all data as a single block in the next format: * { * "meta": * [ @@ -52,6 +51,7 @@ protected: void consumeExtremes(Chunk chunk) override; void writePrefix() override; + void writeSuffix() override; void finalizeImpl() override; void writeChunkStart() override; @@ -61,6 +61,7 @@ protected: Statistics statistics; std::unique_ptr validating_ostr; /// Validates UTF-8 sequences, replaces bad sequences with replacement character. + size_t rows; }; } diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp index 18bd33c3fb2..f748f619cb5 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp @@ -6,8 +6,8 @@ namespace DB { -JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_) - : JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_, mono_block_), column_names(header_.getNames()) +JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_), column_names(header_.getNames()) { } @@ -29,19 +29,14 @@ void JSONCompactColumnsBlockOutputFormat::writeChunkEnd() void registerOutputFormatJSONCompactColumns(FormatFactory & factory) { - for (const auto & [name, mono_block] : {std::make_pair("JSONCompactColumns", false), std::make_pair("JSONCompactColumnsMonoBlock", true)}) + factory.registerOutputFormat("JSONCompactColumns", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams &, + const FormatSettings & format_settings) { - factory.registerOutputFormat(name, [mono_block = mono_block]( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams &, - const FormatSettings & format_settings) - { - return std::make_shared(buf, sample, format_settings, mono_block); - }); - } - - factory.markOutputFormatSupportsParallelFormatting("JSONCompactColumns"); + return std::make_shared(buf, sample, format_settings); + }); } } diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h index 5051e7d93cc..a1f2079d297 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h @@ -4,20 +4,17 @@ namespace DB { -/* Format JSONCompactColumns outputs each block of data in the next format: +/* Format JSONCompactColumns outputs all data as a single block in the next format: * [ * [value1, value2, value3, ...], * [value1, value2m value3, ...], * ... * ] - * There is also JSONCompactColumnsMonoBlock format that buffers up to output_format_json_columns_max_rows_to_buffer rows - * and outputs them as a single block. */ class JSONCompactColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat { public: - /// no_escapes - do not use ANSI escape sequences - to display in the browser, not in the console. - JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_); + JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); String getName() const override { return "JSONCompactColumnsBlockOutputFormat"; } diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index b760de71e4d..9ba358a76e1 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -205,7 +205,7 @@ void PrettyCompactBlockOutputFormat::write(Chunk chunk, PortKind port_kind) return; } - mono_chunk.append(chunk, chunk.getNumRows()); + mono_chunk.append(chunk); return; } else diff --git a/tests/queries/0_stateless/02293_formats_json_columns.reference b/tests/queries/0_stateless/02293_formats_json_columns.reference index 6ceda30c840..da8d080ac05 100644 --- a/tests/queries/0_stateless/02293_formats_json_columns.reference +++ b/tests/queries/0_stateless/02293_formats_json_columns.reference @@ -1,28 +1,4 @@ JSONColumns -{ - "a": [0, 1], - "b": ["String", "String"], - "c": [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]]] -} -{ - "a": [2, 3], - "b": ["String", "String"], - "c": [[[[0,1],"String"],[[0,1],"gnirtS"]], [[[],"String"],[[0,1,2],"gnirtS"]]] -} -{ - "a": [4], - "b": ["String"], - "c": [[[[0],"String"],[[],"gnirtS"]]] -} -a Nullable(Float64) -b Nullable(String) -c Array(Tuple(Array(Nullable(Float64)), Nullable(String))) -0 String [([],'String'),([],'gnirtS')] -1 String [([0],'String'),([0],'gnirtS')] -2 String [([0,1],'String'),([0,1],'gnirtS')] -3 String [([],'String'),([0,1,2],'gnirtS')] -4 String [([0],'String'),([],'gnirtS')] -JSONColumnsMonoBlock { "a": [0, 1, 2, 3, 4], "b": ["String", "String", "String", "String", "String"], @@ -36,43 +12,7 @@ c Array(Tuple(Array(Nullable(Float64)), Nullable(String))) 2 String [([0,1],'String'),([0,1],'gnirtS')] 3 String [([],'String'),([0,1,2],'gnirtS')] 4 String [([0],'String'),([],'gnirtS')] -JSONColumnsMonoBlock 3 rows -{ - "a": [0, 1, 2], - "b": ["String", "String", "String"], - "c": [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]], [[[0,1],"String"],[[0,1],"gnirtS"]]] -} -a Nullable(Float64) -b Nullable(String) -c Array(Tuple(Array(Nullable(Float64)), Nullable(String))) -0 String [([],'String'),([],'gnirtS')] -1 String [([0],'String'),([0],'gnirtS')] -2 String [([0,1],'String'),([0,1],'gnirtS')] JSONCompactColumns -[ - [0, 1], - ["String", "String"], - [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]]] -] -[ - [2, 3], - ["String", "String"], - [[[[0,1],"String"],[[0,1],"gnirtS"]], [[[],"String"],[[0,1,2],"gnirtS"]]] -] -[ - [4], - ["String"], - [[[[0],"String"],[[],"gnirtS"]]] -] -c1 Nullable(Float64) -c2 Nullable(String) -c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String))) -0 String [([],'String'),([],'gnirtS')] -1 String [([0],'String'),([0],'gnirtS')] -2 String [([0,1],'String'),([0,1],'gnirtS')] -3 String [([],'String'),([0,1,2],'gnirtS')] -4 String [([0],'String'),([],'gnirtS')] -JSONCompactColumnsMonoBlock [ [0, 1, 2, 3, 4], ["String", "String", "String", "String", "String"], @@ -86,18 +26,6 @@ c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String))) 2 String [([0,1],'String'),([0,1],'gnirtS')] 3 String [([],'String'),([0,1,2],'gnirtS')] 4 String [([0],'String'),([],'gnirtS')] -JSONCompactColumnsMonoBlock 3 rows -[ - [0, 1, 2], - ["String", "String", "String"], - [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]], [[[0,1],"String"],[[0,1],"gnirtS"]]] -] -c1 Nullable(Float64) -c2 Nullable(String) -c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String))) -0 String [([],'String'),([],'gnirtS')] -1 String [([0],'String'),([0],'gnirtS')] -2 String [([0,1],'String'),([0,1],'gnirtS')] JSONColumnsWithMetadata { "meta": @@ -146,54 +74,6 @@ JSONColumnsWithMetadata "bytes_read": 20 } } -JSONColumnsWithMetadata 3 rows -{ - "meta": - [ - { - "name": "sum", - "type": "UInt64" - }, - { - "name": "avg", - "type": "Float64" - } - ], - - "data": - { - "sum": ["1", "2", "3", "4"], - "avg": [1, 2, 3, 2] - }, - - "totals": - { - "sum": "10", - "avg": 2 - }, - - "extremes": - { - "min": - { - "sum": "1", - "avg": 1 - }, - "max": - { - "sum": "4", - "avg": 3 - } - }, - - "rows": 4, - - "statistics": - { - "rows_read": 5, - "bytes_read": 20 - } -} b Nullable(Float64) a Nullable(Float64) c Nullable(Float64) diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index d416cf6da5a..8beac62dd6f 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -18,40 +18,14 @@ $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" > $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" $CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" -echo "JSONColumnsMonoBlock" -$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumnsMonoBlock" -$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumnsMonoBlock" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" - -echo "JSONColumnsMonoBlock 3 rows" -$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumnsMonoBlock settings output_format_json_columns_max_rows_to_buffer=3" -$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumnsMonoBlock settings output_format_json_columns_max_rows_to_buffer=3" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" - echo "JSONCompactColumns" $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" > $DATA_FILE $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" $CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" -echo "JSONCompactColumnsMonoBlock" -$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumnsMonoBlock" -$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumnsMonoBlock" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" - -echo "JSONCompactColumnsMonoBlock 3 rows" -$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumnsMonoBlock settings output_format_json_columns_max_rows_to_buffer=3" -$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumnsMonoBlock settings output_format_json_columns_max_rows_to_buffer=3" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" - echo "JSONColumnsWithMetadata" $CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata" --extremes=1 | grep -v "elapsed" -echo "JSONColumnsWithMetadata 3 rows" -$CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata settings output_format_json_columns_max_rows_to_buffer=3" --extremes=1 | grep -v "elapsed" echo ' From 9d9dc0304da1c0bb0a3a16bfb429392a6215a128 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 9 May 2022 11:29:02 +0000 Subject: [PATCH 074/615] Fix tests --- .../00285_not_all_data_in_totals.reference | 4 ++-- .../00313_const_totals_extremes.reference | 12 ++++++------ .../0_stateless/02293_formats_json_columns.sh | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/00285_not_all_data_in_totals.reference b/tests/queries/0_stateless/00285_not_all_data_in_totals.reference index 961d8a34c09..065c39f5909 100644 --- a/tests/queries/0_stateless/00285_not_all_data_in_totals.reference +++ b/tests/queries/0_stateless/00285_not_all_data_in_totals.reference @@ -25,7 +25,7 @@ [0, "2"] ], - "totals": [0,"2000"], + "totals": [0, "2000"], "rows": 10, @@ -58,7 +58,7 @@ [0, "2"] ], - "totals": [0,"2000"], + "totals": [0, "2000"], "rows": 10, diff --git a/tests/queries/0_stateless/00313_const_totals_extremes.reference b/tests/queries/0_stateless/00313_const_totals_extremes.reference index f9084065989..fcb39b8080c 100644 --- a/tests/queries/0_stateless/00313_const_totals_extremes.reference +++ b/tests/queries/0_stateless/00313_const_totals_extremes.reference @@ -65,12 +65,12 @@ [1.23, "1"] ], - "totals": [1.23,"1"], + "totals": [1.23, "1"], "extremes": { - "min": [1.23,"1"], - "max": [1.23,"1"] + "min": [1.23, "1"], + "max": [1.23, "1"] }, "rows": 1 @@ -142,12 +142,12 @@ [1.1, "1"] ], - "totals": [1.1,"1"], + "totals": [1.1, "1"], "extremes": { - "min": [1.1,"1"], - "max": [1.1,"1"] + "min": [1.1, "1"], + "max": [1.1, "1"] }, "rows": 1 diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index 8beac62dd6f..291908f26df 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 1e8d7ae749da11b4a6cb35c1230bacc4c7efaeb9 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 9 May 2022 11:29:40 +0000 Subject: [PATCH 075/615] Fix --- src/Processors/Formats/Impl/JSONRowOutputFormat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 86ccd2ddb12..d0d50526a0d 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -47,7 +47,7 @@ void JSONRowOutputFormat::writeField(const IColumn & column, const ISerializatio void JSONRowOutputFormat::writeFieldDelimiter() { - writeJSONFieldDelimiter(out); + writeJSONFieldDelimiter(*ostr); } @@ -67,7 +67,7 @@ void JSONRowOutputFormat::writeRowEndDelimiter() void JSONRowOutputFormat::writeRowBetweenDelimiter() { - writeJSONFieldDelimiter(out); + writeJSONFieldDelimiter(*ostr); } From a988cfa27b22eabeb68e35e296cb3406894de284 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 9 May 2022 11:53:19 +0000 Subject: [PATCH 076/615] Small fixes --- src/Coordination/KeeperStorage.cpp | 37 +++++++++++++++++++----------- src/Coordination/NodesAccessor.h | 0 2 files changed, 24 insertions(+), 13 deletions(-) delete mode 100644 src/Coordination/NodesAccessor.h diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 2a123e6a352..1d996707a31 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -225,6 +225,8 @@ std::shared_ptr KeeperStorage::CurrentNodes::getNode(String const auto & committed_node = maybe_node_it->value; node = std::make_shared(); node->stat = committed_node.stat; + node->seq_num = committed_node.seq_num; + node->setData(committed_node.getData()); } applyDeltas( @@ -242,6 +244,11 @@ std::shared_ptr KeeperStorage::CurrentNodes::getNode(String assert(node); node = nullptr; }, + [&](const UpdateNodeDelta & update_delta) + { + assert(node); + update_delta.update_fn(*node); + }, [&](auto && /*delta*/) {}, }); @@ -504,7 +511,7 @@ struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageReques Coordination::ZooKeeperResponsePtr process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override { - return {zk_request->makeResponse(), {}}; + return zk_request->makeResponse(); } }; @@ -570,7 +577,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr std::vector new_deltas; auto parent_path = parentPath(request.path); - auto parent_node = storage.current_nodes.getNode(std::string{parent_path}); + auto parent_node = storage.current_nodes.getNode(parent_path); if (parent_node == nullptr) return {{zxid, Coordination::Error::ZNONODE}}; @@ -606,6 +613,8 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr stat.ctime = time; stat.mtime = time; stat.numChildren = 0; + stat.version = 0; + stat.aversion = 0; stat.dataLength = request.data.length(); stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; @@ -1318,18 +1327,22 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro for (size_t i = 0; i < concrete_requests.size(); ++i) { auto new_deltas = concrete_requests[i]->preprocess(storage, zxid, session_id, time); - if (auto * error = std::get_if(&new_deltas.back().operation)) + + if (!new_deltas.empty()) { - std::erase_if(saved_deltas, [zxid](const auto & delta) { return delta.zxid == zxid; }); - - response_errors.push_back(error->error); - - for (size_t j = i + 1; j < concrete_requests.size(); ++j) + if (auto * error = std::get_if(&new_deltas.back().operation)) { - response_errors.push_back(Coordination::Error::ZRUNTIMEINCONSISTENCY); - } + std::erase_if(saved_deltas, [zxid](const auto & delta) { return delta.zxid == zxid; }); - return {{zxid, KeeperStorage::FailedMultiDelta{std::move(response_errors)}}}; + response_errors.push_back(error->error); + + for (size_t j = i + 1; j < concrete_requests.size(); ++j) + { + response_errors.push_back(Coordination::Error::ZRUNTIMEINCONSISTENCY); + } + + return {{zxid, KeeperStorage::FailedMultiDelta{std::move(response_errors)}}}; + } } new_deltas.emplace_back(zxid, KeeperStorage::SubDeltaEnd{}); response_errors.push_back(Coordination::Error::ZOK); @@ -1616,8 +1629,6 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { - ephemerals.erase(session_id); - commit(zxid, session_id); for (const auto & delta : current_nodes.deltas) diff --git a/src/Coordination/NodesAccessor.h b/src/Coordination/NodesAccessor.h deleted file mode 100644 index e69de29bb2d..00000000000 From 5c72da83d38e0f99a514f9abcbadd2ee27c94092 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Mon, 9 May 2022 17:19:53 +0300 Subject: [PATCH 077/615] Fix typos --- src/Disks/IDisk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 548c1b09280..9c51085966c 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -308,10 +308,10 @@ public: /// other alive harlinks will not be removed. virtual UInt32 getRefCount(const String &) const { return 0; } - /// Revision is an incremental counter of disk operaion. + /// Revision is an incremental counter of disk operation. /// Revision currently exisis only in DiskS3. /// It is used to save current state during backup and restore that state from backup. - /// This methos sets current disk revision if it lower than required. + /// This method sets current disk revision if it lower than required. virtual void syncRevision(UInt64) {} /// Return current disk revision. virtual UInt64 getRevision() const { return 0; } From cbada6fe03d7cd8d43e0e393cd7d39c121fb6041 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 9 May 2022 15:42:59 +0000 Subject: [PATCH 078/615] Fix Illegal column Nothing while using arrayMap --- src/Functions/IFunction.cpp | 14 ++++++++++++++ .../02294_nothing_arguments_in_functions.reference | 6 ++++++ .../02294_nothing_arguments_in_functions.sql | 6 ++++++ 3 files changed, 26 insertions(+) create mode 100644 tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference create mode 100644 tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 19638c78daf..95dafcbb575 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -275,6 +275,11 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const { + /// Result type Nothing means that we don't need to execute function at all. + /// Example: select arrayMap(x -> 2 * x, []); + if (isNothing(result_type)) + return result_type->createColumn(); + if (useDefaultImplementationForSparseColumns()) { size_t num_sparse_columns = 0; @@ -430,6 +435,15 @@ DataTypePtr IFunctionOverloadResolver::getReturnTypeWithoutLowCardinality(const } } + /// If one of the arguments is Nothing, then we won't really execute + /// the function and the result type should be also Nothing. + /// Example: select arrayMap(x -> 2 * x, []); + for (const auto & arg : arguments) + { + if (isNothing(arg.type)) + return std::make_shared(); + } + return getReturnTypeImpl(arguments); } diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference new file mode 100644 index 00000000000..954015207ad --- /dev/null +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference @@ -0,0 +1,6 @@ +[] +Array(Nothing) +[] +Array(Nothing) +[] +Array(Nothing) diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql new file mode 100644 index 00000000000..3df2577e465 --- /dev/null +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql @@ -0,0 +1,6 @@ +select arrayMap(x -> 2 * x, []); +select toTypeName(arrayMap(x -> 2 * x, [])); +select arrayMap((x, y) -> x + y, [], []); +select toTypeName(arrayMap((x, y) -> x + y, [], [])); +select arrayMap((x, y) -> x + y, [], CAST([], 'Array(Int32)')); +select toTypeName(arrayMap((x, y) -> x + y, [], CAST([], 'Array(Int32)'))); From 8c3c80f84cc82c36ad63d17356a8474403242cb0 Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 10 May 2022 14:54:36 +0800 Subject: [PATCH 079/615] rename parent table to source table --- src/Storages/WindowView/StorageWindowView.cpp | 10 +++++----- src/Storages/WindowView/StorageWindowView.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 1321440f140..38b8675b69d 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -510,8 +510,8 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) auto creator = [&](const StorageID & blocks_id_global) { - auto parent_table_metadata = getParentTable()->getInMemoryMetadataPtr(); - auto required_columns = parent_table_metadata->getColumns(); + auto source_table_metadata = getSourceTable()->getInMemoryMetadataPtr(); + auto required_columns = source_table_metadata->getColumns(); required_columns.add(ColumnDescription("____timestamp", std::make_shared())); return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::WithMergeableState); }; @@ -1361,8 +1361,8 @@ void StorageWindowView::writeIntoWindowView( auto creator = [&](const StorageID & blocks_id_global) { - auto parent_metadata = window_view.getParentTable()->getInMemoryMetadataPtr(); - auto required_columns = parent_metadata->getColumns(); + auto source_metadata = window_view.getSourceTable()->getInMemoryMetadataPtr(); + auto required_columns = source_metadata->getColumns(); required_columns.add(ColumnDescription("____timestamp", std::make_shared())); return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::FetchColumns); }; @@ -1509,7 +1509,7 @@ Block & StorageWindowView::getHeader() const return sample_block; } -StoragePtr StorageWindowView::getParentTable() const +StoragePtr StorageWindowView::getSourceTable() const { return DatabaseCatalog::instance().getTable(select_table_id, getContext()); } diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index f61dfc334f0..99c931ac4a8 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -236,7 +236,7 @@ private: ASTPtr getFinalQuery() const { return final_query->clone(); } ASTPtr getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const; - StoragePtr getParentTable() const; + StoragePtr getSourceTable() const; StoragePtr getInnerTable() const; StoragePtr getTargetTable() const; From 7eeb463fdb7eb60f27fad4a299488139fc287a7e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 May 2022 07:00:38 +0000 Subject: [PATCH 080/615] Fix local --- src/Coordination/KeeperStorage.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 1d996707a31..176e45d4cd4 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -702,7 +702,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce Coordination::ZooKeeperGetResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - if constexpr (local) + if constexpr (!local) { if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { @@ -851,7 +851,7 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperExistsResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); - if constexpr (local) + if constexpr (!local) { if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { @@ -1005,7 +1005,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc Coordination::ZooKeeperListResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); - if constexpr (local) + if constexpr (!local) { if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { @@ -1093,7 +1093,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro Coordination::ZooKeeperCheckResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); - if constexpr (local) + if constexpr (!local) { if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { @@ -1233,7 +1233,7 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperGetACLResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); - if constexpr (local) + if constexpr (!local) { if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { From 5d0a5d34c848286453c02ad1eaf573effb087a74 Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 10 May 2022 15:06:58 +0800 Subject: [PATCH 081/615] removing function populate in windowview --- src/Interpreters/InterpreterCreateQuery.cpp | 18 ++- .../Transforms/buildPushingToViewsChain.cpp | 4 +- src/Storages/WindowView/StorageWindowView.cpp | 125 ++++++++++-------- src/Storages/WindowView/StorageWindowView.h | 10 +- src/Storages/WindowView/WindowViewSource.h | 4 +- 5 files changed, 86 insertions(+), 75 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index c8784522207..27cc46de865 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1437,19 +1437,17 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) /// If the query is a CREATE SELECT, insert the data into the table. if (create.select && !create.attach && !create.is_ordinary_view && !create.is_live_view - && (!create.is_materialized_view || create.is_populate)) + && (!(create.is_materialized_view || create.is_window_view) || create.is_populate)) { - if (create.is_window_view) - { - auto table = DatabaseCatalog::instance().getTable({create.getDatabase(), create.getTable(), create.uuid}, getContext()); - if (auto * window_view = typeid_cast(table.get())) - return window_view->populate(); - return {}; - } - auto insert = std::make_shared(); insert->table_id = {create.getDatabase(), create.getTable(), create.uuid}; - insert->select = create.select->clone(); + if (create.is_window_view) + { + auto table = DatabaseCatalog::instance().getTable(insert->table_id, getContext()); + insert->select = typeid_cast(table.get())->getSourceTableSelectQuery(); + } + else + insert->select = create.select->clone(); return InterpreterInsertQuery(insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 8abbb63d418..88a36a9a235 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -314,7 +314,7 @@ Chain buildPushingToViewsChain( runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW; query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log out = buildPushingToViewsChain( - dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true, view_thread_status, view_counter_ms, storage_header); + dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true, view_thread_status, view_counter_ms); } else out = buildPushingToViewsChain( @@ -392,7 +392,7 @@ Chain buildPushingToViewsChain( } else if (auto * window_view = dynamic_cast(storage.get())) { - auto sink = std::make_shared(live_view_header, *window_view, storage, context); + auto sink = std::make_shared(window_view->getHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 38b8675b69d..ade544d191f 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -606,6 +606,54 @@ inline void StorageWindowView::fire(UInt32 watermark) } } +ASTPtr StorageWindowView::getSourceTableSelectQuery() +{ + auto select_query_ = select_query->clone(); + auto & modified_select = select_query_->as(); + + if (hasJoin(modified_select)) + { + auto analyzer_res = TreeRewriterResult({}); + removeJoin(modified_select, analyzer_res, getContext()); + } + else + { + modified_select.setExpression(ASTSelectQuery::Expression::HAVING, {}); + modified_select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); + } + + auto select_list = std::make_shared(); + for (const auto & column_name : source_header.getNames()) + select_list->children.emplace_back(std::make_shared(column_name)); + modified_select.setExpression(ASTSelectQuery::Expression::SELECT, select_list); + + if (!is_time_column_func_now) + { + auto select_query_ = select_query->clone(); + DropTableIdentifierMatcher::Data drop_table_identifier_data; + DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data); + drop_table_identifier_visitor.visit(select_query_); + + FetchQueryInfoMatcher::Data query_info_data; + FetchQueryInfoMatcher::Visitor(query_info_data).visit(select_query_); + + auto order_by = std::make_shared(); + auto order_by_elem = std::make_shared(); + order_by_elem->children.push_back(std::make_shared(query_info_data.timestamp_column_name)); + order_by_elem->direction = 1; + order_by->children.push_back(order_by_elem); + modified_select.setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_by)); + } + else + modified_select.setExpression(ASTSelectQuery::Expression::ORDER_BY, {}); + + const auto select_with_union_query = std::make_shared(); + select_with_union_query->list_of_selects = std::make_shared(); + select_with_union_query->list_of_selects->children.push_back(select_query_); + + return select_with_union_query; +} + std::shared_ptr StorageWindowView::getInnerTableCreateQuery( const ASTPtr & inner_query, ASTStorage * storage, const String & database_name, const String & table_name) { @@ -1002,16 +1050,11 @@ StorageWindowView::StorageWindowView( const StorageID & table_id_, ContextPtr context_, const ASTCreateQuery & query, - const ColumnsDescription & columns_, bool attach_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) { - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(columns_); - setInMemoryMetadata(storage_metadata); - if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); @@ -1021,6 +1064,13 @@ StorageWindowView::StorageWindowView( "UNION is not supported for {}", getName()); select_query = query.select->list_of_selects->children.at(0)->clone(); + + source_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns)) + .getSampleBlock(); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription(source_header.getNamesAndTypesList())); + setInMemoryMetadata(storage_metadata); + String select_database_name = getContext()->getCurrentDatabase(); String select_table_name; auto select_query_tmp = select_query->clone(); @@ -1205,52 +1255,6 @@ private: ContextPtr context; }; -BlockIO StorageWindowView::populate() -{ - QueryPipelineBuilder pipeline; - - InterpreterSelectQuery interpreter_fetch{select_query, getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns)}; - pipeline = interpreter_fetch.buildQueryPipeline(); - - if (!is_time_column_func_now) - { - SortDescription order_descr; - order_descr.emplace_back(timestamp_column_name); - pipeline.addSimpleTransform( - [&](const Block & header) - { - return std::make_shared( - header, - order_descr, - getContext()->getSettingsRef().max_block_size, - 0 /*LIMIT*/, - getContext()->getSettingsRef().max_bytes_before_remerge_sort, - getContext()->getSettingsRef().remerge_sort_lowered_memory_bytes_ratio, - getContext()->getSettingsRef().max_bytes_before_external_sort, - getContext()->getTemporaryVolume(), - getContext()->getSettingsRef().min_free_disk_space_for_temporary_data); - }); - } - - auto sink = std::make_shared(interpreter_fetch.getSampleBlock(), *this, nullptr, getContext()); - - BlockIO res; - - pipeline.addChain(Chain(std::move(sink))); - pipeline.setMaxThreads(1); - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); - - res.pipeline.addStorageHolder(shared_from_this()); - res.pipeline.addStorageHolder(getInnerTable()); - - return res; -} - void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, const Block & block, ContextPtr local_context) { @@ -1492,21 +1496,26 @@ void StorageWindowView::dropInnerTableIfAny(bool no_delay, ContextPtr local_cont } } -Block & StorageWindowView::getHeader() const +Block StorageWindowView::getHeader() const +{ + return source_header; +} + +Block StorageWindowView::getTargetHeader() const { std::lock_guard lock(sample_block_lock); - if (!sample_block) + if (!target_header) { - sample_block = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::Complete)) + target_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::Complete)) .getSampleBlock(); /// convert all columns to full columns /// in case some of them are constant - for (size_t i = 0; i < sample_block.columns(); ++i) + for (size_t i = 0; i < target_header.columns(); ++i) { - sample_block.safeGetByPosition(i).column = sample_block.safeGetByPosition(i).column->convertToFullColumnIfConst(); + target_header.safeGetByPosition(i).column = target_header.safeGetByPosition(i).column->convertToFullColumnIfConst(); } } - return sample_block; + return target_header; } StoragePtr StorageWindowView::getSourceTable() const @@ -1578,7 +1587,7 @@ void registerStorageWindowView(StorageFactory & factory) "Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')", ErrorCodes::SUPPORT_IS_DISABLED); - return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.attach); + return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.attach); }); } diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 99c931ac4a8..a95ea2aa715 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -108,7 +108,6 @@ public: const StorageID & table_id_, ContextPtr context_, const ASTCreateQuery & query, - const ColumnsDescription & columns, bool attach_); String getName() const override { return "WindowView"; } @@ -153,6 +152,10 @@ public: ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } + ASTPtr getSourceTableSelectQuery(); + + Block getHeader() const; + private: Poco::Logger * log; @@ -168,7 +171,8 @@ private: bool is_tumble; // false if is hop std::atomic shutdown_called{false}; bool has_inner_table{true}; - mutable Block sample_block; + mutable Block source_header; + mutable Block target_header; UInt64 clean_interval_ms; const DateLUTImpl * time_zone = nullptr; UInt32 max_timestamp = 0; @@ -240,6 +244,6 @@ private: StoragePtr getInnerTable() const; StoragePtr getTargetTable() const; - Block & getHeader() const; + Block getTargetHeader() const; }; } diff --git a/src/Storages/WindowView/WindowViewSource.h b/src/Storages/WindowView/WindowViewSource.h index a726cdc8712..ae5eecfac3c 100644 --- a/src/Storages/WindowView/WindowViewSource.h +++ b/src/Storages/WindowView/WindowViewSource.h @@ -20,7 +20,7 @@ public: : SourceWithProgress( is_events_ ? Block( {ColumnWithTypeAndName(ColumnUInt32::create(), std::make_shared(window_view_timezone_), "watermark")}) - : storage_->getHeader()) + : storage_->getTargetHeader()) , storage(storage_) , is_events(is_events_) , window_view_timezone(window_view_timezone_) @@ -32,7 +32,7 @@ public: header.insert( ColumnWithTypeAndName(ColumnUInt32::create(), std::make_shared(window_view_timezone_), "watermark")); else - header = storage->getHeader(); + header = storage->getTargetHeader(); } String getName() const override { return "WindowViewSource"; } From bb6b1883fc077926da18d3da59edda9a465be56f Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 10 May 2022 15:07:23 +0800 Subject: [PATCH 082/615] add tests for windowview populate --- .../0_stateless/01048_window_view_parser.reference | 4 ++++ tests/queries/0_stateless/01048_window_view_parser.sql | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/tests/queries/0_stateless/01048_window_view_parser.reference b/tests/queries/0_stateless/01048_window_view_parser.reference index 947b68c3a89..b708ecb656b 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.reference +++ b/tests/queries/0_stateless/01048_window_view_parser.reference @@ -15,6 +15,8 @@ CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecon CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 ||---JOIN--- CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +||---POPULATE JOIN--- +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nSETTINGS index_granularity = 8192 @@ -32,3 +34,5 @@ CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecon CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 ||---JOIN--- CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +||---POPULATE JOIN--- +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index e17352205e3..c048a5d9ec0 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -54,6 +54,11 @@ DROP TABLE IF EXISTS test_01048.wv; CREATE WINDOW VIEW test_01048.wv AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; +SELECT '||---POPULATE JOIN---'; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv POPULATE AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; + SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; @@ -99,6 +104,11 @@ DROP TABLE IF EXISTS test_01048.wv; CREATE WINDOW VIEW test_01048.wv AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; +SELECT '||---POPULATE JOIN---'; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv POPULATE AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; + DROP TABLE test_01048.wv; DROP TABLE test_01048.mt; DROP TABLE test_01048.mt_2; From f8d0aa4bc18d85d520408a10d3b74ebd032899e6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 May 2022 09:53:15 +0000 Subject: [PATCH 083/615] Fix update delta --- src/Coordination/KeeperServer.cpp | 1 - src/Coordination/KeeperStorage.cpp | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 22a3a4624c6..1f089ba2cb7 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -390,7 +390,6 @@ nuraft::ptr getZooKeeperLogEntry(int64_t session_id, int64_t tim DB::writeIntBinary(session_id, buf); request->write(buf); DB::writeIntBinary(time, buf); - DB::writeIntBinary(static_cast(0), buf); return buf.getBuffer(); } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 176e45d4cd4..b31b121caf1 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -324,7 +324,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i bool finish_subdelta = false; auto result = std::visit( - [&, &path = delta.path](const DeltaType & operation) -> Coordination::Error + [&, &path = delta.path](DeltaType & operation) -> Coordination::Error { if constexpr (std::same_as) { @@ -925,13 +925,13 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce request.path, zxid, KeeperStorage::UpdateNodeDelta{ - [zxid, data = request.data, time](KeeperStorage::Node & value) mutable + [zxid, data = request.data, time](KeeperStorage::Node & value) { value.stat.version++; value.stat.mzxid = zxid; value.stat.mtime = time; value.stat.dataLength = data.length(); - value.setData(std::move(data)); + value.setData(data); }, request.version}); From b3340caea43b8460b44cc59fe6bad48c9f1ff1e4 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Mon, 9 May 2022 10:42:52 +0900 Subject: [PATCH 084/615] fixing hashid function registration when hashid is disabled --- src/Core/config_core.h.in | 1 + src/Functions/FunctionHashID.cpp | 3 ++- src/Functions/registerFunctions.cpp | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 3fc2503aaa5..6d296c2dbff 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -21,3 +21,4 @@ #cmakedefine01 USE_ODBC #cmakedefine01 USE_REPLXX #cmakedefine01 USE_JEMALLOC +#cmakedefine01 USE_HASHIDSXX diff --git a/src/Functions/FunctionHashID.cpp b/src/Functions/FunctionHashID.cpp index 5983af73391..8ef81c5cfd1 100644 --- a/src/Functions/FunctionHashID.cpp +++ b/src/Functions/FunctionHashID.cpp @@ -1,9 +1,9 @@ #include "FunctionHashID.h" -#if USE_HASHIDSXX #include +#if USE_HASHIDSXX namespace DB { @@ -12,6 +12,7 @@ void registerFunctionHashID(FunctionFactory & factory) { factory.registerFunction(); } + } #endif diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 2472b78cbcd..d70f019b3e1 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -91,7 +91,6 @@ void registerFunctions() registerFunctionsExternalDictionaries(factory); registerFunctionsExternalModels(factory); registerFunctionsFormatting(factory); - registerFunctionHashID(factory); registerFunctionsHashing(factory); registerFunctionsHigherOrder(factory); registerFunctionsLogical(factory); @@ -133,6 +132,10 @@ void registerFunctions() #endif registerFunctionTid(factory); registerFunctionLogTrace(factory); + +#if USE_HASHIDSXX + registerFunctionHashID(factory); +#endif } } From 02319f92a310c52338e5b5dc72264c45dfb5b888 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 May 2022 12:53:18 +0000 Subject: [PATCH 085/615] Define auth for sessions with preprocess --- src/Coordination/KeeperStorage.cpp | 162 ++++++++++++----------------- src/Coordination/KeeperStorage.h | 41 ++++++-- 2 files changed, 98 insertions(+), 105 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index b31b121caf1..53a5e5f843c 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -55,33 +55,6 @@ String generateDigest(const String & userdata) return user_password[0] + ":" + base64Encode(getSHA1(userdata)); } -bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector & session_auths) -{ - if (node_acls.empty()) - return true; - - for (const auto & session_auth : session_auths) - if (session_auth.scheme == "super") - return true; - - for (const auto & node_acl : node_acls) - { - if (node_acl.permissions & permission) - { - if (node_acl.scheme == "world" && node_acl.id == "anyone") - return true; - - for (const auto & session_auth : session_auths) - { - if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) - return true; - } - } - } - - return false; -} - bool fixupACL( const std::vector & request_acls, const std::vector & current_ids, @@ -384,6 +357,11 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i finish_subdelta = true; return Coordination::Error::ZOK; } + else if constexpr (std::same_as) + { + session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id)); + return Coordination::Error::ZOK; + } else { // shouldn't be called in any process functions @@ -531,7 +509,7 @@ struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProc namespace { - Coordination::ACLs getACLs(KeeperStorage & storage, StringRef path, bool is_local) + Coordination::ACLs getNodeACLs(KeeperStorage & storage, StringRef path, bool is_local) { if (is_local) { @@ -546,6 +524,33 @@ namespace } } +bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session_id, bool is_local) +{ + const auto node_acls = getNodeACLs(*this, path, is_local); + if (node_acls.empty()) + return true; + + if (current_nodes.hasAcl(session_id, is_local, [](const auto & auth_id) { return auth_id.scheme == "super"; })) + return true; + + for (const auto & node_acl : node_acls) + { + if (node_acl.permissions & permission) + { + if (node_acl.scheme == "world" && node_acl.id == "anyone") + return true; + + if (current_nodes.hasAcl( + session_id, + is_local, + [&](const auto & auth_id) { return auth_id.scheme == node_acl.scheme && auth_id.id == node_acl.id; })) + return true; + } + } + + return false; +} + struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor { @@ -560,14 +565,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { auto path = zk_request->getPath(); - auto parent_path = parentPath(path); - - const auto node_acls = getACLs(storage, parent_path, is_local); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Create, node_acls, session_auths); + return storage.checkACL(parentPath(path), Coordination::ACL::Create, session_id, is_local); } std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override @@ -674,12 +672,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce { bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - const auto node_acls = getACLs(storage, zk_request->getPath(), is_local); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Read, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; @@ -751,12 +744,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr { bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Delete, node_acls, session_auths); + return storage.checkACL(parentPath(zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; @@ -898,12 +886,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce { bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Write, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Write, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; @@ -977,12 +960,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc { bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Read, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; @@ -1062,12 +1040,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro { bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Read, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; @@ -1144,12 +1117,7 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr { bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Admin, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; @@ -1205,14 +1173,9 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr { bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - const auto node_acls = getACLs(storage, parentPath(zk_request->getPath()), is_local); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - /// LOL, GetACL require more permissions, then SetACL... - return checkACL(Coordination::ACL::Admin | Coordination::ACL::Read, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin | Coordination::ACL::Read, session_id, is_local); } + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::vector @@ -1450,35 +1413,39 @@ struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestPro struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - Coordination::ZooKeeperResponsePtr - process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id, int64_t /* time */) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override { Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*zk_request); Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); - auto & sessions_and_auth = storage.session_and_auth; if (auth_request.scheme != "digest" || std::count(auth_request.data.begin(), auth_request.data.end(), ':') != 1) + return {{zxid, Coordination::Error::ZAUTHFAILED}}; + + std::vector new_deltas; + auto digest = generateDigest(auth_request.data); + if (digest == storage.superdigest) { - auth_response.error = Coordination::Error::ZAUTHFAILED; + KeeperStorage::AuthID auth{"super", ""}; + new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(auth)}); } else { - auto digest = generateDigest(auth_request.data); - if (digest == storage.superdigest) - { - KeeperStorage::AuthID auth{"super", ""}; - sessions_and_auth[session_id].emplace_back(auth); - } - else - { - KeeperStorage::AuthID auth{auth_request.scheme, digest}; - auto & session_ids = sessions_and_auth[session_id]; - if (std::find(session_ids.begin(), session_ids.end(), auth) == session_ids.end()) - session_ids.emplace_back(auth); - } + KeeperStorage::AuthID new_auth{auth_request.scheme, digest}; + if (storage.current_nodes.hasAcl(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) + new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(new_auth)}); } + return new_deltas; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); + + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + auth_response.error = result; + return response_ptr; } }; @@ -1566,7 +1533,6 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() void KeeperStorage::preprocessRequest( const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, int64_t new_last_zxid, bool check_acl) { - current_nodes.current_zxid = new_last_zxid; KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 2fa87328dfd..21cec45dbc1 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -140,8 +140,14 @@ public: { }; + struct AddAuthDelta + { + int64_t session_id; + AuthID auth_id; + }; + using Operation - = std::variant; + = std::variant; struct Delta { @@ -149,9 +155,7 @@ public: Delta(int64_t zxid_, Coordination::Error error) : Delta("", zxid_, ErrorDelta{error}) { } - Delta(int64_t zxid_, SubDeltaEnd subdelta) : Delta("", zxid_, subdelta) { } - - Delta(int64_t zxid_, FailedMultiDelta failed_multi) : Delta("", zxid_, failed_multi) { } + Delta(int64_t zxid_, Operation subdelta) : Delta("", zxid_, subdelta) { } String path; int64_t zxid; @@ -167,19 +171,40 @@ public: { for (const auto & delta : deltas) { - if (delta.path == path) + if (path.empty() || delta.path == path) std::visit(visitor, delta.operation); } } + template + bool hasAcl(int64_t session_id, bool is_local, Predicate predicate) + { + for (const auto & session_auth : storage.session_and_auth[session_id]) + { + if (predicate(session_auth)) + return true; + } + + if (is_local) + return false; + + + for (const auto & delta : deltas) + { + if (auto * auth_delta = std::get_if(&delta.operation); + auth_delta && auth_delta->session_id == session_id && predicate(auth_delta->auth_id)) + return true; + } + + return false; + } + std::shared_ptr getNode(StringRef path); bool hasNode(StringRef path) const; Coordination::ACLs getACLs(StringRef path) const; - std::unordered_map> node_to_deltas; std::deque deltas; KeeperStorage & storage; - int64_t current_zxid{0}; }; CurrentNodes current_nodes{*this}; @@ -196,6 +221,8 @@ public: int64_t session_id); bool removeNode(const std::string & path, int32_t version); + bool checkACL(StringRef path, int32_t permissions, int64_t session_id, bool is_local); + /// Mapping session_id -> set of ephemeral nodes paths Ephemerals ephemerals; /// Mapping session_id -> set of watched nodes paths From 88007809c2bf5285a436d71a0e4a33cb8c040242 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 May 2022 13:04:35 +0000 Subject: [PATCH 086/615] Add rollback support --- src/Coordination/KeeperStateMachine.cpp | 6 ++++++ src/Coordination/KeeperStateMachine.h | 2 +- src/Coordination/KeeperStorage.cpp | 7 +++++++ src/Coordination/KeeperStorage.h | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 9b2c8947d95..fa3a5195226 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -196,6 +196,12 @@ void KeeperStateMachine::commit_config(const uint64_t /* log_idx */, nuraft::ptr cluster_config = ClusterConfig::deserialize(*tmp); } +void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & /*data*/) +{ + std::lock_guard lock(storage_and_responses_lock); + storage->rollbackRequest(log_idx); +} + nuraft::ptr KeeperStateMachine::last_snapshot() { /// Just return the latest snapshot. diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index a9fdfd7fac2..32dadab6570 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -38,7 +38,7 @@ public: void commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) override; /// NOLINT /// Currently not supported - void rollback(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override {} + void rollback(uint64_t log_idx, nuraft::buffer & data) override; uint64_t last_commit_index() override { return last_committed_idx; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 53a5e5f843c..b6a52781d4b 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1690,6 +1690,13 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( return results; } +void KeeperStorage::rollbackRequest(int64_t rollback_zxid) +{ + // we can only rollback the last zxid (if there is any) + // if there is a delta with a larger zxid, we have invalid state + assert(current_nodes.deltas.empty() || current_nodes.deltas.back().zxid <= rollback_zxid); + std::erase_if(current_nodes.deltas, [rollback_zxid](const auto & delta) { return delta.zxid == rollback_zxid; }); +} void KeeperStorage::clearDeadWatches(int64_t session_id) { diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 21cec45dbc1..8834fd38c5b 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -280,6 +280,7 @@ public: bool is_local = false); void preprocessRequest( const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid, bool check_acl = true); + void rollbackRequest(int64_t rollback_zxid); void finalize(); From 6d28b226878f935505edef2606c860da3b58bc9b Mon Sep 17 00:00:00 2001 From: Yuriy Chernyshov Date: Tue, 10 May 2022 16:14:59 +0300 Subject: [PATCH 087/615] Fix jemalloc compatibility with LLVM libunwind jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. The latter is identified by `JEMALLOC_PROF_LIBGCC` and provides `_Unwind_Backtrace` method instead of `unw_backtrace`. At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). While this PR does not allow complete remove of the patch (as ClickHouse itself uses unw_backtrace directly), it definitely sorts the things out. --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index c038e3d7aea..6f16ae2dc38 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -170,7 +170,7 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) if (USE_UNWIND) - target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1) + target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) target_link_libraries (_jemalloc PRIVATE unwind) endif () From 8610d741774dcc3e635951e47f288c6395f02a63 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 May 2022 13:31:39 +0000 Subject: [PATCH 088/615] init cversion --- src/Coordination/KeeperStorage.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index b6a52781d4b..72ef6b51b5f 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -410,7 +410,6 @@ bool KeeperStorage::createNode( created_node.stat = stat; created_node.setData(std::move(data)); created_node.is_sequental = is_sequental; - container.insert(path, created_node); auto [map_key, _] = container.insert(path, created_node); /// Take child path from key owned by map. auto child_path = getBaseName(map_key->getKey()); @@ -613,11 +612,12 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr stat.numChildren = 0; stat.version = 0; stat.aversion = 0; + stat.cversion = 0; stat.dataLength = request.data.length(); stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; new_deltas.emplace_back( - path_created, + std::move(path_created), zxid, KeeperStorage::CreateNodeDelta{stat, request.is_ephemeral, request.is_sequential, std::move(node_acls), request.data}); From 79080ff0cd45fe026b8622dafa8ec0e3f2264ef3 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 May 2022 13:42:16 +0000 Subject: [PATCH 089/615] Fix style --- src/Coordination/KeeperStorage.cpp | 20 ++++++++++++++------ src/Coordination/KeeperStorage.h | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 72ef6b51b5f..63bb258a604 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -529,7 +529,7 @@ bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session if (node_acls.empty()) return true; - if (current_nodes.hasAcl(session_id, is_local, [](const auto & auth_id) { return auth_id.scheme == "super"; })) + if (current_nodes.hasACL(session_id, is_local, [](const auto & auth_id) { return auth_id.scheme == "super"; })) return true; for (const auto & node_acl : node_acls) @@ -539,7 +539,7 @@ bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session if (node_acl.scheme == "world" && node_acl.id == "anyone") return true; - if (current_nodes.hasAcl( + if (current_nodes.hasACL( session_id, is_local, [&](const auto & auth_id) { return auth_id.scheme == node_acl.scheme && auth_id.id == node_acl.id; })) @@ -918,9 +918,17 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce }, request.version}); - new_deltas.emplace_back(parentPath(request.path).toString(), zxid, KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent) { - parent.stat.cversion++; - }}); + new_deltas.emplace_back( + parentPath(request.path).toString(), + zxid, + KeeperStorage::UpdateNodeDelta + { + [](KeeperStorage::Node & parent) + { + parent.stat.cversion++; + } + } + ); return new_deltas; } @@ -1431,7 +1439,7 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc else { KeeperStorage::AuthID new_auth{auth_request.scheme, digest}; - if (storage.current_nodes.hasAcl(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) + if (storage.current_nodes.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(new_auth)}); } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 8834fd38c5b..6a9239f4cee 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -177,7 +177,7 @@ public: } template - bool hasAcl(int64_t session_id, bool is_local, Predicate predicate) + bool hasACL(int64_t session_id, bool is_local, Predicate predicate) { for (const auto & session_auth : storage.session_and_auth[session_id]) { From ea0362b3a3f0f1299dcd18d87b687975980828b7 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 10 May 2022 16:20:38 +0000 Subject: [PATCH 090/615] Fix tests --- .../Formats/Impl/CHColumnToArrowColumn.cpp | 1 - .../Impl/JSONColumnsBaseBlockInputFormat.cpp | 7 ++ .../Impl/JSONColumnsBaseBlockInputFormat.h | 2 + .../00378_json_quote_64bit_integers.reference | 12 +-- tests/queries/0_stateless/out | 87 +++++++++++++++++++ 5 files changed, 102 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/out diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index f4f82e81234..043e4f1e724 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -308,7 +308,6 @@ namespace DB } status = builder->AppendIndices(indexes.data(), indexes.size(), arrow_null_bytemap_raw_ptr); - std::cerr << assert_cast(builder->type().get())->index_type()->name() << "\n"; checkStatus(status, column->getName(), format_name); } diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp index f7cd3c5ec6d..1d0bac914e1 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp @@ -103,11 +103,18 @@ size_t JSONColumnsBaseBlockInputFormat::readColumn( return column.size(); } +void JSONColumnsBaseBlockInputFormat::setReadBuffer(ReadBuffer & in_) +{ + reader->setReadBuffer(in_); + IInputFormat::setReadBuffer(in_); +} + Chunk JSONColumnsBaseBlockInputFormat::generate() { MutableColumns columns = getPort().getHeader().cloneEmptyColumns(); block_missing_values.clear(); + skipWhitespaceIfAny(*in); if (in->eof()) return {}; diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h index 8676b5c4ad3..fea8b03b809 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h @@ -48,6 +48,8 @@ public: String getName() const override { return "JSONColumnsBaseBlockInputFormat"; } + void setReadBuffer(ReadBuffer & in_) override; + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } protected: diff --git a/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference b/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference index 49c937e09df..5174c13a9e0 100644 --- a/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference +++ b/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference @@ -119,12 +119,12 @@ ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]] ], - "totals": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",["0"],["0","0"]], + "totals": ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]], "extremes": { - "min": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",["0"],["0","0"]], - "max": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",["0"],["0","0"]] + "min": ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]], + "max": ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]] }, "rows": 1 @@ -251,12 +251,12 @@ [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]] ], - "totals": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[0],[0,0]], + "totals": [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]], "extremes": { - "min": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[0],[0,0]], - "max": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[0],[0,0]] + "min": [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]], + "max": [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]] }, "rows": 1 diff --git a/tests/queries/0_stateless/out b/tests/queries/0_stateless/out new file mode 100644 index 00000000000..9bb2afe7104 --- /dev/null +++ b/tests/queries/0_stateless/out @@ -0,0 +1,87 @@ +Arrow +b'ARROW1\x00\x00\xff\xff\xff\xff\x08\x01\x00\x00\x10\x00\x00\x00\x00\x00\n\x00\x0c\x00\x06\x00\x05\x00\x08\x00\n\x00\x00\x00\x00\x01\x04\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x03\x00\x00\x00\xa8\x00\x00\x00l\x00\x00\x00\x04\x00\x00\x00t\xff\xff\xff\x00\x00\x00\x0c\x14\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00 \x00\x00\x00\x03\x00\x00\x00arr\x00\xa0\xff\xff\xff\x10\x00\x14\x00\x08\x00\x06\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x01\x02\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00item\x00\x00\x00\x00\x9e\xff\xff\xff@\x00\x00\x00\xd8\xff\xff\xff\x00\x00\x00\x04\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00s\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x10\x00\x00\x00\x1c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00id\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00@\x00\x00\x00\xff\xff\xff\xff(\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x04\x00\x18\x00\x00\x00(\x02\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x18\x00\x0c\x00\x04\x00\x08\x00\n\x00\x00\x00\xac\x00\x00\x00\x10\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00h\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x07\x00\x00\x00\x08\x00\x00\x00\t\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x000123456789\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x06\x00\x00\x00\n\x00\x00\x00\x0f\x00\x00\x00\x15\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00\x10\x00\x00\x00\x0c\x00\x14\x00\x06\x00\x08\x00\x0c\x00\x10\x00\x0c\x00\x00\x00\x00\x00\x04\x00<\x00\x00\x00(\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x18\x01\x00\x00\x00\x00\x00\x000\x01\x00\x00\x00\x00\x00\x00(\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x03\x00\x00\x00\xa8\x00\x00\x00l\x00\x00\x00\x04\x00\x00\x00t\xff\xff\xff\x00\x00\x00\x0c\x14\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00 \x00\x00\x00\x03\x00\x00\x00arr\x00\xa0\xff\xff\xff\x10\x00\x14\x00\x08\x00\x06\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x01\x02\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00item\x00\x00\x00\x00\x9e\xff\xff\xff@\x00\x00\x00\xd8\xff\xff\xff\x00\x00\x00\x04\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00s\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x10\x00\x00\x00\x1c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00id\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00@\x00\x00\x008\x01\x00\x00ARROW1' +ArrowStream +b'\xff\xff\xff\xff\x08\x01\x00\x00\x10\x00\x00\x00\x00\x00\n\x00\x0c\x00\x06\x00\x05\x00\x08\x00\n\x00\x00\x00\x00\x01\x04\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x03\x00\x00\x00\xa8\x00\x00\x00l\x00\x00\x00\x04\x00\x00\x00t\xff\xff\xff\x00\x00\x00\x0c\x14\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00 \x00\x00\x00\x03\x00\x00\x00arr\x00\xa0\xff\xff\xff\x10\x00\x14\x00\x08\x00\x06\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x01\x02\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00item\x00\x00\x00\x00\x9e\xff\xff\xff@\x00\x00\x00\xd8\xff\xff\xff\x00\x00\x00\x04\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00s\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x10\x00\x00\x00\x1c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00id\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00@\x00\x00\x00\xff\xff\xff\xff(\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x04\x00\x18\x00\x00\x00(\x02\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x18\x00\x0c\x00\x04\x00\x08\x00\n\x00\x00\x00\xac\x00\x00\x00\x10\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00h\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x07\x00\x00\x00\x08\x00\x00\x00\t\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x000123456789\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x06\x00\x00\x00\n\x00\x00\x00\x0f\x00\x00\x00\x15\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00' +Avro +b'Obj\x01\x04\x14avro.codec\x0csnappy\x16avro.schema\xb0\x02{"type":"record","name":"row","fields":[{"name":"id","type":"long"},{"name":"s","type":"string"},{"name":"arr","type":{"type":"array","items":"long"}}]}\x00VF=&\xb7\x91.u\xb7/\xe8{\xbe%\x91\xbb\x14\xbc\x01^\xc0\x00\x020\x00\x02\x021\x02\x00\x00\x04\x022\x04\x00\x02\x00\x06\x023\x06\x00\x02\x04\x00\x08\x024\x08\x00\x02\x04\x06\x00\n\x025\n\x00\x02\x04\x06\x08\x00\x0c\x026\x0c\x00\x01\n\x14\n\x00\x0e\x027\x0e\x01\x1e\x1c\x08\n\x0c\x00\x10\x028\x10\r\x0c<\x0e\x00\x12\x029\x12\x00\x02\x04\x06\x08\n\x0c\x0e\x10\x00x\xc9)kVF=&\xb7\x91.u\xb7/\xe8{\xbe%\x91\xbb' +CSV +b'0,"0","[]"\n1,"1","[0]"\n2,"2","[0,1]"\n3,"3","[0,1,2]"\n4,"4","[0,1,2,3]"\n5,"5","[0,1,2,3,4]"\n6,"6","[0,1,2,3,4,5]"\n7,"7","[0,1,2,3,4,5,6]"\n8,"8","[0,1,2,3,4,5,6,7]"\n9,"9","[0,1,2,3,4,5,6,7,8]"\n' +CSVWithNames +b'"id","s","arr"\n0,"0","[]"\n1,"1","[0]"\n2,"2","[0,1]"\n3,"3","[0,1,2]"\n4,"4","[0,1,2,3]"\n5,"5","[0,1,2,3,4]"\n6,"6","[0,1,2,3,4,5]"\n7,"7","[0,1,2,3,4,5,6]"\n8,"8","[0,1,2,3,4,5,6,7]"\n9,"9","[0,1,2,3,4,5,6,7,8]"\n' +CSVWithNamesAndTypes +b'"id","s","arr"\n"UInt64","String","Array(UInt64)"\n0,"0","[]"\n1,"1","[0]"\n2,"2","[0,1]"\n3,"3","[0,1,2]"\n4,"4","[0,1,2,3]"\n5,"5","[0,1,2,3,4]"\n6,"6","[0,1,2,3,4,5]"\n7,"7","[0,1,2,3,4,5,6]"\n8,"8","[0,1,2,3,4,5,6,7]"\n9,"9","[0,1,2,3,4,5,6,7,8]"\n' +CustomSeparated +b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +CustomSeparatedWithNames +b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +CustomSeparatedWithNamesAndTypes +b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +JSONColumns +b'{\n\t"id": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],\n\t"s": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],\n\t"arr": [[], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"], ["0","1","2","3","4","5"], ["0","1","2","3","4","5","6"], ["0","1","2","3","4","5","6","7"], ["0","1","2","3","4","5","6","7","8"]]\n}\n' +JSONCompactColumns +b'[\n\t["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],\n\t["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],\n\t[[], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"], ["0","1","2","3","4","5"], ["0","1","2","3","4","5","6"], ["0","1","2","3","4","5","6","7"], ["0","1","2","3","4","5","6","7","8"]]\n]\n' +JSONCompactEachRow +b'["0", "0", []]\n["1", "1", ["0"]]\n["2", "2", ["0","1"]]\n["3", "3", ["0","1","2"]]\n["4", "4", ["0","1","2","3"]]\n["5", "5", ["0","1","2","3","4"]]\n["6", "6", ["0","1","2","3","4","5"]]\n["7", "7", ["0","1","2","3","4","5","6"]]\n["8", "8", ["0","1","2","3","4","5","6","7"]]\n["9", "9", ["0","1","2","3","4","5","6","7","8"]]\n' +JSONCompactEachRowWithNames +b'["id", "s", "arr"]\n["0", "0", []]\n["1", "1", ["0"]]\n["2", "2", ["0","1"]]\n["3", "3", ["0","1","2"]]\n["4", "4", ["0","1","2","3"]]\n["5", "5", ["0","1","2","3","4"]]\n["6", "6", ["0","1","2","3","4","5"]]\n["7", "7", ["0","1","2","3","4","5","6"]]\n["8", "8", ["0","1","2","3","4","5","6","7"]]\n["9", "9", ["0","1","2","3","4","5","6","7","8"]]\n' +JSONCompactEachRowWithNamesAndTypes +b'["id", "s", "arr"]\n["UInt64", "String", "Array(UInt64)"]\n["0", "0", []]\n["1", "1", ["0"]]\n["2", "2", ["0","1"]]\n["3", "3", ["0","1","2"]]\n["4", "4", ["0","1","2","3"]]\n["5", "5", ["0","1","2","3","4"]]\n["6", "6", ["0","1","2","3","4","5"]]\n["7", "7", ["0","1","2","3","4","5","6"]]\n["8", "8", ["0","1","2","3","4","5","6","7"]]\n["9", "9", ["0","1","2","3","4","5","6","7","8"]]\n' +JSONCompactStringsEachRow +b'["0", "0", "[]"]\n["1", "1", "[0]"]\n["2", "2", "[0,1]"]\n["3", "3", "[0,1,2]"]\n["4", "4", "[0,1,2,3]"]\n["5", "5", "[0,1,2,3,4]"]\n["6", "6", "[0,1,2,3,4,5]"]\n["7", "7", "[0,1,2,3,4,5,6]"]\n["8", "8", "[0,1,2,3,4,5,6,7]"]\n["9", "9", "[0,1,2,3,4,5,6,7,8]"]\n' +JSONCompactStringsEachRowWithNames +b'["id", "s", "arr"]\n["0", "0", "[]"]\n["1", "1", "[0]"]\n["2", "2", "[0,1]"]\n["3", "3", "[0,1,2]"]\n["4", "4", "[0,1,2,3]"]\n["5", "5", "[0,1,2,3,4]"]\n["6", "6", "[0,1,2,3,4,5]"]\n["7", "7", "[0,1,2,3,4,5,6]"]\n["8", "8", "[0,1,2,3,4,5,6,7]"]\n["9", "9", "[0,1,2,3,4,5,6,7,8]"]\n' +JSONCompactStringsEachRowWithNamesAndTypes +b'["id", "s", "arr"]\n["UInt64", "String", "Array(UInt64)"]\n["0", "0", "[]"]\n["1", "1", "[0]"]\n["2", "2", "[0,1]"]\n["3", "3", "[0,1,2]"]\n["4", "4", "[0,1,2,3]"]\n["5", "5", "[0,1,2,3,4]"]\n["6", "6", "[0,1,2,3,4,5]"]\n["7", "7", "[0,1,2,3,4,5,6]"]\n["8", "8", "[0,1,2,3,4,5,6,7]"]\n["9", "9", "[0,1,2,3,4,5,6,7,8]"]\n' +JSONEachRow +b'{"id":"0","s":"0","arr":[]}\n{"id":"1","s":"1","arr":["0"]}\n{"id":"2","s":"2","arr":["0","1"]}\n{"id":"3","s":"3","arr":["0","1","2"]}\n{"id":"4","s":"4","arr":["0","1","2","3"]}\n{"id":"5","s":"5","arr":["0","1","2","3","4"]}\n{"id":"6","s":"6","arr":["0","1","2","3","4","5"]}\n{"id":"7","s":"7","arr":["0","1","2","3","4","5","6"]}\n{"id":"8","s":"8","arr":["0","1","2","3","4","5","6","7"]}\n{"id":"9","s":"9","arr":["0","1","2","3","4","5","6","7","8"]}\n' +JSONLines +b'{"id":"0","s":"0","arr":[]}\n{"id":"1","s":"1","arr":["0"]}\n{"id":"2","s":"2","arr":["0","1"]}\n{"id":"3","s":"3","arr":["0","1","2"]}\n{"id":"4","s":"4","arr":["0","1","2","3"]}\n{"id":"5","s":"5","arr":["0","1","2","3","4"]}\n{"id":"6","s":"6","arr":["0","1","2","3","4","5"]}\n{"id":"7","s":"7","arr":["0","1","2","3","4","5","6"]}\n{"id":"8","s":"8","arr":["0","1","2","3","4","5","6","7"]}\n{"id":"9","s":"9","arr":["0","1","2","3","4","5","6","7","8"]}\n' +JSONStringsEachRow +b'{"id":"0","s":"0","arr":"[]"}\n{"id":"1","s":"1","arr":"[0]"}\n{"id":"2","s":"2","arr":"[0,1]"}\n{"id":"3","s":"3","arr":"[0,1,2]"}\n{"id":"4","s":"4","arr":"[0,1,2,3]"}\n{"id":"5","s":"5","arr":"[0,1,2,3,4]"}\n{"id":"6","s":"6","arr":"[0,1,2,3,4,5]"}\n{"id":"7","s":"7","arr":"[0,1,2,3,4,5,6]"}\n{"id":"8","s":"8","arr":"[0,1,2,3,4,5,6,7]"}\n{"id":"9","s":"9","arr":"[0,1,2,3,4,5,6,7,8]"}\n' +MsgPack +b'\x00\xc4\x010\x90\x01\xc4\x011\x91\x00\x02\xc4\x012\x92\x00\x01\x03\xc4\x013\x93\x00\x01\x02\x04\xc4\x014\x94\x00\x01\x02\x03\x05\xc4\x015\x95\x00\x01\x02\x03\x04\x06\xc4\x016\x96\x00\x01\x02\x03\x04\x05\x07\xc4\x017\x97\x00\x01\x02\x03\x04\x05\x06\x08\xc4\x018\x98\x00\x01\x02\x03\x04\x05\x06\x07\t\xc4\x019\x99\x00\x01\x02\x03\x04\x05\x06\x07\x08' +NDJSON +b'{"id":"0","s":"0","arr":[]}\n{"id":"1","s":"1","arr":["0"]}\n{"id":"2","s":"2","arr":["0","1"]}\n{"id":"3","s":"3","arr":["0","1","2"]}\n{"id":"4","s":"4","arr":["0","1","2","3"]}\n{"id":"5","s":"5","arr":["0","1","2","3","4"]}\n{"id":"6","s":"6","arr":["0","1","2","3","4","5"]}\n{"id":"7","s":"7","arr":["0","1","2","3","4","5","6"]}\n{"id":"8","s":"8","arr":["0","1","2","3","4","5","6","7"]}\n{"id":"9","s":"9","arr":["0","1","2","3","4","5","6","7","8"]}\n' +Native +b'\x03\n\x02id\x06UInt64\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x01s\x06String\x010\x011\x012\x013\x014\x015\x016\x017\x018\x019\x03arr\rArray(UInt64)\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00\x00\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00' +ORC +b'ORC\n\x0b\n\x03\x00\x00\x00\x12\x04\x08\nP\x00\n\x15\n\x05\x00\x00\x00\x00\x00\x12\x0c\x08\n\x12\x06\x08\x00\x10\x12\x18ZP\x00\n\x12\n\x06\x00\x00\x00\x00\x00\x00\x12\x08\x08\nB\x02\x08\x14P\x00\n\r\n\x05\x00\x00\x00\x00\x00\x12\x04\x08\nP\x00\n\x16\n\x05\x00\x00\x00\x00\x00\x12\r\x08-\x12\x07\x08\x00\x10\x10\x18\xf0\x01P\x00\xfe\xff\xc0\xfe\xff\xc0\x07\x01\x00\xfe\xff\xc0\x07\x00\x010123456789\xfe\xff\xc0\x07\x01\x00\x02\xff\xff\xf8\xfd\x00\x00\x02\x00\x01\x00\x01\x01\x00\x02\x01\x00\x03\x01\x00\x04\x01\x00\x05\x01\x00\x06\x01\x00\n\x06\x08\x06\x10\x00\x18\r\n\x06\x08\x06\x10\x01\x18\x17\n\x06\x08\x06\x10\x02\x18\x14\n\x06\x08\x06\x10\x03\x18\x0f\n\x06\x08\x06\x10\x04\x18\x18\n\x06\x08\x00\x10\x00\x18\x03\n\x06\x08\x00\x10\x01\x18\x03\n\x06\x08\x01\x10\x01\x18\x03\n\x06\x08\x00\x10\x02\x18\x03\n\x06\x08\x02\x10\x02\x18\x03\n\x06\x08\x01\x10\x02\x18\n\n\x06\x08\x00\x10\x03\x18\x03\n\x06\x08\x02\x10\x03\x18\x03\n\x06\x08\x00\x10\x04\x18\x04\n\x06\x08\x01\x10\x04\x18\x19\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x1a\x03GMT\n3\n\x04\x08\nP\x00\n\x0c\x08\n\x12\x06\x08\x00\x10\x12\x18ZP\x00\n\x08\x08\nB\x02\x08\x14P\x00\n\x04\x08\nP\x00\n\r\x08-\x12\x07\x08\x00\x10\x10\x18\xf0\x01P\x00\x08\x03\x10\xb6\x02\x1a\x0b\x08\x03\x10_\x18< \x9b\x01(\n"\x19\x08\x0c\x12\x03\x01\x02\x03\x1a\x02id\x1a\x01s\x1a\x03arr \x00(\x000\x00"\x08\x08\x04 \x00(\x000\x00"\x08\x08\x08 \x00(\x000\x00"\x0b\x08\n\x12\x01\x04 \x00(\x000\x00"\x08\x08\x04 \x00(\x000\x000\n:\x04\x08\nP\x00:\x0c\x08\n\x12\x06\x08\x00\x10\x12\x18ZP\x00:\x08\x08\nB\x02\x08\x14P\x00:\x04\x08\nP\x00:\r\x08-\x12\x07\x08\x00\x10\x10\x18\xf0\x01P\x00@\x90NH\x01\x08\x92\x01\x10\x00\x18\x80\x80\x04"\x02\x00\x0b(50\x06\x82\xf4\x03\x03ORC\x18' +Parquet +b'PAR1\x15\x04\x15\xa0\x01\x15dL\x15\x14\x15\x04\x12\x00\x00P\x00\x00\r\x01\x00\x01\r\x08\x00\x02\r\x08\x00\x03\r\x08\x00\x04\r\x08\x00\x05\r\x08\x00\x06\r\x08\x00\x07\r\x08<\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\x14\x15\x18,\x15\x14\x15\x04\x15\x06\x15\x06\x1c6\x00(\x08\t\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n$\x04\x05\x102Tv\x98\x00\x00\x00&\xf4\x01\x1c\x15\x04\x195\x04\x00\x06\x19\x18\x02id\x15\x02\x16\x14\x16\xa4\x02\x16\xec\x01&\x8a\x01&\x08\x1c6\x00(\x08\t\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00\x15\x04\x15d\x15\\L\x15\x14\x15\x04\x12\x00\x002\x10\x01\x00\x00\x000\x01\x05\x001\x01\x05\x002\x01\x05\x003\x01\x05\x004\x01\x05\x005\x01\x05<6\x01\x00\x00\x007\x01\x00\x00\x008\x01\x00\x00\x009\x15\x00\x15\x14\x15\x18,\x15\x14\x15\x04\x15\x06\x15\x06\x1c6\x00(\x019\x18\x010\x00\x00\x00\n$\x04\x05\x102Tv\x98\x00\x00\x00&\xcc\x04\x1c\x15\x0c\x195\x04\x00\x06\x19\x18\x01s\x15\x02\x16\x14\x16\xca\x01\x16\xc6\x01&\xfe\x03&\x86\x03\x1c6\x00(\x019\x18\x010\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00\x15\x04\x15\x90\x01\x15\\L\x15\x12\x15\x04\x12\x00\x00H\x00\x00\r\x01\x00\x01\r\x08\x00\x02\r\x08\x00\x03\r\x08\x00\x04\r\x08\x00\x05\r\x08\x00\x06\r\x08<\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\\\x15`,\x15\\\x15\x04\x15\x06\x15\x06\x1c6\x02(\x08\x08\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00.\xb4\x07\x00\x00\x00\rh\xf7\xbe\xdf\xdf?\x05\x00\x00\x00\x03\xa8\xaaL\x02\x04\r\x00\x01!\x102\x102\x04!C\x05!Ce\x102Tv\x102Tv\x08\x00&\xee\x07\x1c\x15\x04\x195\x04\x00\x06\x198\x03arr\x04list\x04item\x15\x02\x16\\\x16\xdc\x02\x16\xac\x02&\xbc\x06&\xc2\x05\x1c6\x02(\x08\x08\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00\x15\x02\x19l5\x00\x18\x06schema\x15\x06\x00\x15\x04%\x00\x18\x02id%\x1cL\xac\x13@\x12\x00\x00\x00\x15\x0c%\x00\x18\x01s\x005\x00\x18\x03arr\x15\x02\x15\x06L<\x00\x00\x005\x04\x18\x04list\x15\x02\x00\x15\x04%\x02\x18\x04item%\x1cL\xac\x13@\x12\x00\x00\x00\x16\x14\x19\x1c\x19<&\xf4\x01\x1c\x15\x04\x195\x04\x00\x06\x19\x18\x02id\x15\x02\x16\x14\x16\xa4\x02\x16\xec\x01&\x8a\x01&\x08\x1c6\x00(\x08\t\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00&\xcc\x04\x1c\x15\x0c\x195\x04\x00\x06\x19\x18\x01s\x15\x02\x16\x14\x16\xca\x01\x16\xc6\x01&\xfe\x03&\x86\x03\x1c6\x00(\x019\x18\x010\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00&\xee\x07\x1c\x15\x04\x195\x04\x00\x06\x198\x03arr\x04list\x04item\x15\x02\x16\\\x16\xdc\x02\x16\xac\x02&\xbc\x06&\xc2\x05\x1c6\x02(\x08\x08\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00\x16\xca\x06\x16\x14&\x08\x16\xde\x05\x14\x00\x00("parquet-cpp version 1.5.1-SNAPSHOT\x19<\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x00v\x01\x00\x00PAR1' +RowBinary +b'\x00\x00\x00\x00\x00\x00\x00\x00\x010\x00\x01\x00\x00\x00\x00\x00\x00\x00\x011\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x012\x02\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x013\x03\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x014\x04\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x015\x05\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x016\x06\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x017\x07\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x018\x08\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x019\t\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00' +RowBinaryWithNames +b'\x03\x02id\x01s\x03arr\x00\x00\x00\x00\x00\x00\x00\x00\x010\x00\x01\x00\x00\x00\x00\x00\x00\x00\x011\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x012\x02\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x013\x03\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x014\x04\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x015\x05\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x016\x06\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x017\x07\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x018\x08\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x019\t\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00' +RowBinaryWithNamesAndTypes +b'\x03\x02id\x01s\x03arr\x06UInt64\x06String\rArray(UInt64)\x00\x00\x00\x00\x00\x00\x00\x00\x010\x00\x01\x00\x00\x00\x00\x00\x00\x00\x011\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x012\x02\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x013\x03\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x014\x04\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x015\x05\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x016\x06\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x017\x07\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x018\x08\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x019\t\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00' +TSKV +b'id=0\ts=0\tarr=[]\nid=1\ts=1\tarr=[0]\nid=2\ts=2\tarr=[0,1]\nid=3\ts=3\tarr=[0,1,2]\nid=4\ts=4\tarr=[0,1,2,3]\nid=5\ts=5\tarr=[0,1,2,3,4]\nid=6\ts=6\tarr=[0,1,2,3,4,5]\nid=7\ts=7\tarr=[0,1,2,3,4,5,6]\nid=8\ts=8\tarr=[0,1,2,3,4,5,6,7]\nid=9\ts=9\tarr=[0,1,2,3,4,5,6,7,8]\n' +TSV +b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TSVRaw +b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TSVRawWithNames +b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TSVRawWithNamesAndTypes +b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TSVWithNames +b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TSVWithNamesAndTypes +b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TabSeparated +b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TabSeparatedRaw +b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TabSeparatedRawWithNames +b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TabSeparatedRawWithNamesAndTypes +b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TabSeparatedWithNames +b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +TabSeparatedWithNamesAndTypes +b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' +Values +b"(0,'0',[]),(1,'1',[0]),(2,'2',[0,1]),(3,'3',[0,1,2]),(4,'4',[0,1,2,3]),(5,'5',[0,1,2,3,4]),(6,'6',[0,1,2,3,4,5]),(7,'7',[0,1,2,3,4,5,6]),(8,'8',[0,1,2,3,4,5,6,7]),(9,'9',[0,1,2,3,4,5,6,7,8])" +LineAsString +b'0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n' +OK From 02679c72225e7f89b59d7ed49d6cba5c577f344e Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 10 May 2022 16:27:59 +0000 Subject: [PATCH 091/615] Fix tests --- .../Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp | 2 +- src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h | 2 +- src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp | 4 ---- tests/queries/1_stateful/00023_totals_limit.reference | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp index 1d0bac914e1..b2c4a8b5283 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp @@ -181,7 +181,7 @@ JSONColumnsBaseSchemaReader::JSONColumnsBaseSchemaReader( { } -void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) +void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const { auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second) { diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h index fea8b03b809..e912ec4c08e 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h @@ -83,7 +83,7 @@ private: DataTypePtr readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read); /// Choose result type for column from two inferred types from different rows. - void chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row); + void chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const; const FormatSettings format_settings; std::unique_ptr reader; diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 36e2aabf7f8..730907ba45c 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -24,10 +24,6 @@ void PrettySpaceBlockOutputFormat::write(Chunk chunk, PortKind port_kind) const auto & header = getPort(port_kind).getHeader(); const auto & columns = chunk.getColumns(); - Serializations serializations(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getSerialization(*columns[i]->getSerializationInfo()); - WidthsPerColumn widths; Widths max_widths; Widths name_widths; diff --git a/tests/queries/1_stateful/00023_totals_limit.reference b/tests/queries/1_stateful/00023_totals_limit.reference index fc4a02662d7..c76452411d7 100644 --- a/tests/queries/1_stateful/00023_totals_limit.reference +++ b/tests/queries/1_stateful/00023_totals_limit.reference @@ -16,7 +16,7 @@ [1604017, "189"] ], - "totals": [0,"4652"], + "totals": [0, "4652"], "rows": 1, From 9ed1e4f7e68191916c5f66807a6186f80a8b0d9a Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 10 May 2022 16:36:56 +0000 Subject: [PATCH 092/615] fix build --- src/Storages/WindowView/StorageWindowView.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index ade544d191f..c5d050a9f0b 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -608,8 +608,8 @@ inline void StorageWindowView::fire(UInt32 watermark) ASTPtr StorageWindowView::getSourceTableSelectQuery() { - auto select_query_ = select_query->clone(); - auto & modified_select = select_query_->as(); + auto query = select_query->clone(); + auto & modified_select = query->as(); if (hasJoin(modified_select)) { @@ -629,13 +629,13 @@ ASTPtr StorageWindowView::getSourceTableSelectQuery() if (!is_time_column_func_now) { - auto select_query_ = select_query->clone(); + auto query = select_query->clone(); DropTableIdentifierMatcher::Data drop_table_identifier_data; DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data); - drop_table_identifier_visitor.visit(select_query_); + drop_table_identifier_visitor.visit(query); FetchQueryInfoMatcher::Data query_info_data; - FetchQueryInfoMatcher::Visitor(query_info_data).visit(select_query_); + FetchQueryInfoMatcher::Visitor(query_info_data).visit(query); auto order_by = std::make_shared(); auto order_by_elem = std::make_shared(); @@ -649,7 +649,7 @@ ASTPtr StorageWindowView::getSourceTableSelectQuery() const auto select_with_union_query = std::make_shared(); select_with_union_query->list_of_selects = std::make_shared(); - select_with_union_query->list_of_selects->children.push_back(select_query_); + select_with_union_query->list_of_selects->children.push_back(query); return select_with_union_query; } From e48675491f67ee6795114827b6a606bd05f80ca3 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Wed, 11 May 2022 08:31:58 +0900 Subject: [PATCH 093/615] try building again From 7536e0cd258379e6153aa777a00fdd73a1fd342e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 11 May 2022 03:16:10 +0200 Subject: [PATCH 094/615] Fix MSan --- programs/main.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/programs/main.cpp b/programs/main.cpp index 46c4ace1df9..29af4a29a54 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -455,6 +455,11 @@ int main(int argc_, char ** argv_) inside_main = true; SCOPE_EXIT({ inside_main = false; }); + /// PHDR cache is required for query profiler to work reliably + /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen) + /// will work only after additional call of this function. + updatePHDRCache(); + /// Drop privileges if needed. try { @@ -468,16 +473,10 @@ int main(int argc_, char ** argv_) checkHarmfulEnvironmentVariables(); - /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. std::set_new_handler(nullptr); - /// PHDR cache is required for query profiler to work reliably - /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen) - /// will work only after additional call of this function. - updatePHDRCache(); - std::vector argv(argv_, argv_ + argc_); /// Print a basic help if nothing was matched From f3c3935df3ffcbe18d410c4f079631f083ce9565 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 11 May 2022 03:18:28 +0200 Subject: [PATCH 095/615] Fix shellcheck --- docker/server/entrypoint.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 9d337e53a68..f1927fd32b0 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -95,8 +95,8 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)" # Drop privileges - CLICKHOUSE_SETUID="${USER}" - CLICKHOUSE_SETGID="${GROUP}" + export CLICKHOUSE_SETUID="${USER}" + export CLICKHOUSE_SETGID="${GROUP}" # Listen only on localhost until the initialization is done /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 & From e40396a0e56ac3fd142648c3cd80e1495ec99bff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 11 May 2022 03:21:08 +0200 Subject: [PATCH 096/615] Fix Docker --- docker/server/entrypoint.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index f1927fd32b0..996e58d09ad 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -90,14 +90,16 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL EOT fi +# Drop privileges +CLICKHOUSE_SETUID="${USER}" +CLICKHOUSE_SETGID="${GROUP}" +export CLICKHOUSE_SETUID +export CLICKHOUSE_SETGID + if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # port is needed to check if clickhouse-server is ready for connections HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)" - # Drop privileges - export CLICKHOUSE_SETUID="${USER}" - export CLICKHOUSE_SETGID="${GROUP}" - # Listen only on localhost until the initialization is done /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 & pid="$!" @@ -155,7 +157,7 @@ if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then # so the container can't be finished by ctrl+c CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0} export CLICKHOUSE_WATCHDOG_ENABLE - exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@" + /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@" fi # Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image From 15672a8374ed20bec4d243a574680c8c210573b5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 11 May 2022 07:53:32 +0000 Subject: [PATCH 097/615] Fix integration tests --- src/Coordination/KeeperStorage.cpp | 10 +++++++--- src/Coordination/ZooKeeperDataReader.cpp | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 63bb258a604..058f9c0ce7b 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -335,7 +335,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i else if constexpr (std::same_as) { auto node_it = container.find(path); - if (node_it != container.end()) + if (node_it == container.end()) fail(); if (operation.version != -1 && operation.version != node_it->value.stat.aversion) @@ -532,6 +532,7 @@ bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session if (current_nodes.hasACL(session_id, is_local, [](const auto & auth_id) { return auth_id.scheme == "super"; })) return true; + for (const auto & node_acl : node_acls) { if (node_acl.permissions & permission) @@ -1439,7 +1440,7 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc else { KeeperStorage::AuthID new_auth{auth_request.scheme, digest}; - if (storage.current_nodes.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) + if (!storage.current_nodes.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(new_auth)}); } @@ -1654,7 +1655,10 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( /// Original ZooKeeper always throws no auth, even when user provided some credentials response->error = Coordination::Error::ZNOAUTH; } - response = request_processor->processLocal(*this, zxid, session_id, time); + else + { + response = request_processor->processLocal(*this, zxid, session_id, time); + } } else { diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index e59c67329ff..4d1745edc6a 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -520,6 +520,7 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * /*l if (request->getOpNum() == Coordination::OpNum::Multi && hasErrorsInMultiRequest(request)) return true; + storage.preprocessRequest(request, session_id, time, zxid, /* check_acl = */ false); storage.processRequest(request, session_id, time, zxid, /* check_acl = */ false); } } From e4f43d0b01d552f8f9193eb9d920594180f3e324 Mon Sep 17 00:00:00 2001 From: Vxider Date: Wed, 11 May 2022 08:47:08 +0000 Subject: [PATCH 098/615] update getSampleBlock for windowview --- src/Interpreters/InterpreterInsertQuery.cpp | 5 ++++- src/Storages/WindowView/StorageWindowView.cpp | 10 ++++++---- src/Storages/WindowView/StorageWindowView.h | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 8408b0ac5fc..01a1e063ee3 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -101,7 +102,9 @@ Block InterpreterInsertQuery::getSampleBlock( /// If the query does not include information about columns if (!query.columns) { - if (no_destination) + if (auto * window_view = dynamic_cast(table.get())) + return window_view->getHeader(); + else if (no_destination) return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); else return metadata_snapshot->getSampleBlockNonMaterialized(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index c5d050a9f0b..cf4bdaa7b3d 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1050,6 +1050,7 @@ StorageWindowView::StorageWindowView( const StorageID & table_id_, ContextPtr context_, const ASTCreateQuery & query, + const ColumnsDescription & columns_, bool attach_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -1058,6 +1059,10 @@ StorageWindowView::StorageWindowView( if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); + if (query.select->list_of_selects->children.size() != 1) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, @@ -1067,9 +1072,6 @@ StorageWindowView::StorageWindowView( source_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns)) .getSampleBlock(); - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(ColumnsDescription(source_header.getNamesAndTypesList())); - setInMemoryMetadata(storage_metadata); String select_database_name = getContext()->getCurrentDatabase(); String select_table_name; @@ -1587,7 +1589,7 @@ void registerStorageWindowView(StorageFactory & factory) "Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')", ErrorCodes::SUPPORT_IS_DISABLED); - return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.attach); + return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.attach); }); } diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index a95ea2aa715..f7b6035b241 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -108,6 +108,7 @@ public: const StorageID & table_id_, ContextPtr context_, const ASTCreateQuery & query, + const ColumnsDescription & columns_, bool attach_); String getName() const override { return "WindowView"; } From 4411fd87c863d63876ebe43192e44a96c90664e3 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 11 May 2022 16:49:30 +0800 Subject: [PATCH 099/615] reading optimization when all columns to read are partition keys --- src/Storages/Hive/StorageHive.cpp | 82 +++++++++++++++++++++---------- src/Storages/Hive/StorageHive.h | 2 - 2 files changed, 55 insertions(+), 29 deletions(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 09c2f578419..1bacb9cb72e 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -151,13 +151,26 @@ public: { if (!reader) { + if (current_file_remained_rows) [[unlikely]] + { + return generateChunkByPartitionKeys(); + } + current_idx = source_info->next_uri_to_read.fetch_add(1); if (current_idx >= source_info->hive_files.size()) return {}; - const auto & current_file = source_info->hive_files[current_idx]; + current_file = source_info->hive_files[current_idx]; current_path = current_file->getPath(); + if (!to_read_block.columns() && current_file->getRows()) + { + /// this is the case that all columns to read are partition keys. We can construct const columns + /// directly without reading from hive files. + current_file_remained_rows = *(current_file->getRows()); + return generateChunkByPartitionKeys(); + } + String uri_with_path = hdfs_namenode_url + current_path; auto compression = chooseCompressionMethod(current_path, compression_method); std::unique_ptr raw_read_buf; @@ -260,6 +273,45 @@ public: } } + Chunk generateChunkByPartitionKeys() + { + size_t max_rows = getContext()->getSettings().max_block_size; + size_t rows = 0; + if (max_rows > current_file_remained_rows) + { + rows = current_file_remained_rows; + current_file_remained_rows = 0; + } + else + { + rows = max_rows; + current_file_remained_rows -= max_rows; + } + + Columns cols; + auto types = source_info->partition_name_types.getTypes(); + auto names = source_info->partition_name_types.getNames(); + auto fields = current_file->getPartitionValues(); + for (size_t i = 0, sz = types.size(); i < sz; ++i) + { + if (!sample_block.has(names[i])) + continue; + auto col = types[i]->createColumnConst(rows, fields[i]); + auto col_idx = sample_block.getPositionByName(names[i]); + cols.insert(cols.begin() + col_idx, col); + } + + if (source_info->need_file_column) + { + size_t last_slash_pos = current_file->getPath().find_last_of('/'); + auto file_name = current_path.substr(last_slash_pos + 1); + + auto col = DataTypeLowCardinality{std::make_shared()}.createColumnConst(rows, std::move(file_name)); + cols.push_back(col); + } + return Chunk(std::move(cols), rows); + } + private: std::unique_ptr read_buf; std::unique_ptr pipeline; @@ -275,8 +327,10 @@ private: const Names & text_input_field_names; FormatSettings format_settings; + HiveFilePtr current_file; String current_path; size_t current_idx = 0; + size_t current_file_remained_rows = 0; Poco::Logger * log = &Poco::Logger::get("StorageHive"); }; @@ -627,30 +681,6 @@ bool StorageHive::isColumnOriented() const return format_name == "Parquet" || format_name == "ORC"; } -void StorageHive::getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const -{ - if (!isColumnOriented()) - sample_block = header_block; - UInt32 erased_columns = 0; - for (const auto & column : partition_columns) - { - if (sample_block.has(column)) - erased_columns++; - } - if (erased_columns == sample_block.columns()) - { - for (size_t i = 0; i < header_block.columns(); ++i) - { - const auto & col = header_block.getByPosition(i); - if (!partition_columns.count(col.name)) - { - sample_block.insert(col); - break; - } - } - } -} - Pipe StorageHive::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, @@ -690,8 +720,6 @@ Pipe StorageHive::read( sources_info->need_file_column = true; } - getActualColumnsToRead(sample_block, header_block, NameSet{partition_names.begin(), partition_names.end()}); - if (num_streams > sources_info->hive_files.size()) num_streams = sources_info->hive_files.size(); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index d61bb184574..1b37a0afd15 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -117,8 +117,6 @@ private: const ContextPtr & context_, PruneLevel prune_level = PruneLevel::Max) const; - void getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const; - void lazyInitialize(); std::optional From 0fb11ab3ff4ebc09bf4cb3265d59e09ac35ce423 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 11 May 2022 09:08:39 +0000 Subject: [PATCH 100/615] Rename uncommitted state --- src/Coordination/KeeperStateMachine.h | 2 - src/Coordination/KeeperStorage.cpp | 72 +++++++++++++-------------- src/Coordination/KeeperStorage.h | 7 +-- 3 files changed, 40 insertions(+), 41 deletions(-) diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 32dadab6570..aed96a59c13 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -29,7 +29,6 @@ public: void preprocess(uint64_t log_idx, nuraft::buffer & data); - /// Currently not supported nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT @@ -37,7 +36,6 @@ public: /// Save new cluster config to our snapshot (copy of the config stored in StateManager) void commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) override; /// NOLINT - /// Currently not supported void rollback(uint64_t log_idx, nuraft::buffer & data) override; uint64_t last_commit_index() override { return last_committed_idx; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 058f9c0ce7b..fab37aec1bf 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -189,7 +189,7 @@ struct Overloaded : Ts... template Overloaded(Ts...) -> Overloaded; -std::shared_ptr KeeperStorage::CurrentNodes::getNode(StringRef path) +std::shared_ptr KeeperStorage::UncommittedState::getNode(StringRef path) { std::shared_ptr node{nullptr}; @@ -228,7 +228,7 @@ std::shared_ptr KeeperStorage::CurrentNodes::getNode(String return node; } -bool KeeperStorage::CurrentNodes::hasNode(StringRef path) const +bool KeeperStorage::UncommittedState::hasNode(StringRef path) const { bool exists = storage.container.contains(std::string{path}); applyDeltas( @@ -250,7 +250,7 @@ bool KeeperStorage::CurrentNodes::hasNode(StringRef path) const return exists; } -Coordination::ACLs KeeperStorage::CurrentNodes::getACLs(StringRef path) const +Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) const { std::optional acl_id; if (auto maybe_node_it = storage.container.find(path); maybe_node_it != storage.container.end()) @@ -290,7 +290,7 @@ namespace Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_id) { - for (auto & delta : current_nodes.deltas) + for (auto & delta : uncommitted_state.deltas) { if (delta.zxid > commit_zxid) break; @@ -519,7 +519,7 @@ namespace return storage.acl_map.convertNumber(node_it->value.acl_id); } - return storage.current_nodes.getACLs(path); + return storage.uncommitted_state.getACLs(path); } } @@ -529,7 +529,7 @@ bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session if (node_acls.empty()) return true; - if (current_nodes.hasACL(session_id, is_local, [](const auto & auth_id) { return auth_id.scheme == "super"; })) + if (uncommitted_state.hasACL(session_id, is_local, [](const auto & auth_id) { return auth_id.scheme == "super"; })) return true; @@ -540,7 +540,7 @@ bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session if (node_acl.scheme == "world" && node_acl.id == "anyone") return true; - if (current_nodes.hasACL( + if (uncommitted_state.hasACL( session_id, is_local, [&](const auto & auth_id) { return auth_id.scheme == node_acl.scheme && auth_id.id == node_acl.id; })) @@ -575,7 +575,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr std::vector new_deltas; auto parent_path = parentPath(request.path); - auto parent_node = storage.current_nodes.getNode(parent_path); + auto parent_node = storage.uncommitted_state.getNode(parent_path); if (parent_node == nullptr) return {{zxid, Coordination::Error::ZNONODE}}; @@ -594,7 +594,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr path_created += seq_num_str.str(); } - if (storage.current_nodes.hasNode(path_created)) + if (storage.uncommitted_state.hasNode(path_created)) return {{zxid, Coordination::Error::ZNODEEXISTS}}; if (getBaseName(path_created).size == 0) @@ -653,7 +653,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr return response_ptr; } - const auto & deltas = storage.current_nodes.deltas; + const auto & deltas = storage.uncommitted_state.deltas; auto create_delta_it = std::find_if( deltas.begin(), deltas.end(), @@ -683,7 +683,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce { Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - if (!storage.current_nodes.hasNode(request.path)) + if (!storage.uncommitted_state.hasNode(request.path)) return {{zxid, Coordination::Error::ZNONODE}}; return {}; @@ -759,7 +759,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr const auto update_parent_pzxid = [&]() { auto parent_path = parentPath(request.path); - if (!storage.current_nodes.hasNode(parent_path)) + if (!storage.uncommitted_state.hasNode(parent_path)) return; new_deltas.emplace_back( @@ -772,7 +772,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr }}); }; - auto node = storage.current_nodes.getNode(request.path); + auto node = storage.uncommitted_state.getNode(request.path); if (!node) { @@ -827,7 +827,7 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr { Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); - if (!storage.current_nodes.hasNode(request.path)) + if (!storage.uncommitted_state.hasNode(request.path)) return {{zxid, Coordination::Error::ZNONODE}}; return {}; @@ -897,10 +897,10 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce std::vector new_deltas; - if (!storage.current_nodes.hasNode(request.path)) + if (!storage.uncommitted_state.hasNode(request.path)) return {{zxid, Coordination::Error::ZNONODE}}; - auto node = storage.current_nodes.getNode(request.path); + auto node = storage.uncommitted_state.getNode(request.path); if (request.version != -1 && request.version != node->stat.version) return {{zxid, Coordination::Error::ZBADVERSION}}; @@ -978,7 +978,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc { Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); - if (!storage.current_nodes.hasNode(request.path)) + if (!storage.uncommitted_state.hasNode(request.path)) return {{zxid, Coordination::Error::ZNONODE}}; return {}; @@ -1058,10 +1058,10 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro { Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); - if (!storage.current_nodes.hasNode(request.path)) + if (!storage.uncommitted_state.hasNode(request.path)) return {{zxid, Coordination::Error::ZNONODE}}; - auto node = storage.current_nodes.getNode(request.path); + auto node = storage.uncommitted_state.getNode(request.path); if (request.version != -1 && request.version != node->stat.version) return {{zxid, Coordination::Error::ZBADVERSION}}; @@ -1135,11 +1135,11 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr { Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); - auto & current_nodes = storage.current_nodes; - if (!current_nodes.hasNode(request.path)) + auto & uncommitted_state = storage.uncommitted_state; + if (!uncommitted_state.hasNode(request.path)) return {{zxid, Coordination::Error::ZNONODE}}; - auto node = current_nodes.getNode(request.path); + auto node = uncommitted_state.getNode(request.path); if (request.version != -1 && request.version != node->stat.aversion) return {{zxid, Coordination::Error::ZBADVERSION}}; @@ -1192,7 +1192,7 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr { Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); - if (!storage.current_nodes.hasNode(request.path)) + if (!storage.uncommitted_state.hasNode(request.path)) return {{zxid, Coordination::Error::ZNONODE}}; return {}; @@ -1292,7 +1292,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { // manually add deltas so that the result of previous request in the transaction is used in the next request - auto & saved_deltas = storage.current_nodes.deltas; + auto & saved_deltas = storage.uncommitted_state.deltas; std::vector response_errors; response_errors.reserve(concrete_requests.size()); @@ -1330,7 +1330,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); - auto & deltas = storage.current_nodes.deltas; + auto & deltas = storage.uncommitted_state.deltas; if (auto * failed_multi = std::get_if(&deltas.front().operation)) { for (size_t i = 0; i < concrete_requests.size(); ++i) @@ -1440,7 +1440,7 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc else { KeeperStorage::AuthID new_auth{auth_request.scheme, digest}; - if (!storage.current_nodes.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) + if (!storage.uncommitted_state.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(new_auth)}); } @@ -1546,13 +1546,13 @@ void KeeperStorage::preprocessRequest( if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { - auto & deltas = current_nodes.deltas; + auto & deltas = uncommitted_state.deltas; auto session_ephemerals = ephemerals.find(session_id); if (session_ephemerals != ephemerals.end()) { for (const auto & ephemeral_path : session_ephemerals->second) { - if (current_nodes.hasNode(ephemeral_path)) + if (uncommitted_state.hasNode(ephemeral_path)) { deltas.emplace_back( parentPath(ephemeral_path).toString(), @@ -1573,13 +1573,13 @@ void KeeperStorage::preprocessRequest( if (check_acl && !request_processor->checkAuth(*this, session_id, false)) { - current_nodes.deltas.emplace_back(new_last_zxid, Coordination::Error::ZNOAUTH); + uncommitted_state.deltas.emplace_back(new_last_zxid, Coordination::Error::ZNOAUTH); return; } auto new_deltas = request_processor->preprocess(*this, new_last_zxid, session_id, time); - current_nodes.deltas.insert( - current_nodes.deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); + uncommitted_state.deltas.insert( + uncommitted_state.deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); } KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( @@ -1606,7 +1606,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( { commit(zxid, session_id); - for (const auto & delta : current_nodes.deltas) + for (const auto & delta : uncommitted_state.deltas) { if (delta.zxid > zxid) break; @@ -1618,7 +1618,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( } } - std::erase_if(current_nodes.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); + std::erase_if(uncommitted_state.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); clearDeadWatches(session_id); auto auth_it = session_and_auth.find(session_id); @@ -1663,7 +1663,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( else { response = request_processor->process(*this, zxid, session_id, time); - std::erase_if(current_nodes.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); + std::erase_if(uncommitted_state.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); } /// Watches for this requests are added to the watches lists @@ -1706,8 +1706,8 @@ void KeeperStorage::rollbackRequest(int64_t rollback_zxid) { // we can only rollback the last zxid (if there is any) // if there is a delta with a larger zxid, we have invalid state - assert(current_nodes.deltas.empty() || current_nodes.deltas.back().zxid <= rollback_zxid); - std::erase_if(current_nodes.deltas, [rollback_zxid](const auto & delta) { return delta.zxid == rollback_zxid; }); + assert(uncommitted_state.deltas.empty() || uncommitted_state.deltas.back().zxid <= rollback_zxid); + std::erase_if(uncommitted_state.deltas, [rollback_zxid](const auto & delta) { return delta.zxid == rollback_zxid; }); } void KeeperStorage::clearDeadWatches(int64_t session_id) diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 6a9239f4cee..cf85c366789 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -136,6 +136,7 @@ public: std::vector error_codes; }; + // Denotes end of a subrequest in multi request struct SubDeltaEnd { }; @@ -162,9 +163,9 @@ public: Operation operation; }; - struct CurrentNodes + struct UncommittedState { - explicit CurrentNodes(KeeperStorage & storage_) : storage(storage_) { } + explicit UncommittedState(KeeperStorage & storage_) : storage(storage_) { } template void applyDeltas(StringRef path, const Visitor & visitor) const @@ -207,7 +208,7 @@ public: KeeperStorage & storage; }; - CurrentNodes current_nodes{*this}; + UncommittedState uncommitted_state{*this}; Coordination::Error commit(int64_t zxid, int64_t session_id); From b2aa1802cdc4cd0d12ae9a6c50146d9ea3d7e41e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 11 May 2022 09:08:55 +0000 Subject: [PATCH 101/615] Add unit test for basic CRUD with preprocessing --- src/Coordination/tests/gtest_coordination.cpp | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index d2545550c4f..236b33d4497 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1,6 +1,8 @@ #include #include +#include "Common/ZooKeeper/IKeeper.h" +#include "Coordination/KeeperStorage.h" #include "config_core.h" #if USE_NURAFT @@ -1736,6 +1738,130 @@ TEST_P(CoordinationTest, TestLogGap) EXPECT_EQ(changelog1.next_slot(), 61); } +template +ResponseType getSingleResponse(const auto & responses) +{ + EXPECT_FALSE(responses.empty()); + return dynamic_cast(*responses[0].response); +} + +TEST_P(CoordinationTest, TestUncommittedStateBasicCRUD) +{ + using namespace DB; + using namespace Coordination; + + DB::KeeperStorage storage{500, ""}; + + constexpr std::string_view path = "/test"; + + const auto get_committed_data = [&]() -> std::optional + { + auto request = std::make_shared(); + request->path = path; + auto responses = storage.processRequest(request, 0, 0, std::nullopt, true, true); + const auto & get_response = getSingleResponse(responses); + + if (get_response.error != Error::ZOK) + return std::nullopt; + + return get_response.data; + }; + + const auto preprocess_get = [&](int64_t zxid) + { + auto get_request = std::make_shared(); + get_request->path = path; + storage.preprocessRequest(get_request, 0, 0, zxid); + return get_request; + }; + + const auto create_request = std::make_shared(); + create_request->path = path; + create_request->data = "initial_data"; + storage.preprocessRequest(create_request, 0, 0, 1); + storage.preprocessRequest(create_request, 0, 0, 2); + + ASSERT_FALSE(get_committed_data()); + + const auto after_create_get = preprocess_get(3); + + ASSERT_FALSE(get_committed_data()); + + const auto set_request = std::make_shared(); + set_request->path = path; + set_request->data = "new_data"; + storage.preprocessRequest(set_request, 0, 0, 4); + + const auto after_set_get = preprocess_get(5); + + ASSERT_FALSE(get_committed_data()); + + const auto remove_request = std::make_shared(); + remove_request->path = path; + storage.preprocessRequest(remove_request, 0, 0, 6); + storage.preprocessRequest(remove_request, 0, 0, 7); + + const auto after_remove_get = preprocess_get(8); + + ASSERT_FALSE(get_committed_data()); + + { + const auto responses = storage.processRequest(create_request, 0, 0, 1); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZOK); + } + + { + const auto responses = storage.processRequest(create_request, 0, 0, 2); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZNODEEXISTS); + } + + { + const auto responses = storage.processRequest(after_create_get, 0, 0, 3); + const auto & get_response = getSingleResponse(responses); + ASSERT_EQ(get_response.error, Error::ZOK); + ASSERT_EQ(get_response.data, "initial_data"); + } + + ASSERT_EQ(get_committed_data(), "initial_data"); + + { + const auto responses = storage.processRequest(set_request, 0, 0, 4); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZOK); + } + + { + const auto responses = storage.processRequest(after_set_get, 0, 0, 5); + const auto & get_response = getSingleResponse(responses); + ASSERT_EQ(get_response.error, Error::ZOK); + ASSERT_EQ(get_response.data, "new_data"); + } + + ASSERT_EQ(get_committed_data(), "new_data"); + + { + const auto responses = storage.processRequest(remove_request, 0, 0, 6); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZOK); + } + + { + const auto responses = storage.processRequest(remove_request, 0, 0, 7); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZNONODE); + } + + { + const auto responses = storage.processRequest(after_remove_get, 0, 0, 8); + const auto & get_response = getSingleResponse(responses); + ASSERT_EQ(get_response.error, Error::ZNONODE); + } + + ASSERT_FALSE(get_committed_data()); +} + INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, CoordinationTest, From e6d187001cf678809ecaad4bf8f29a47571aaa0b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 11 May 2022 12:10:17 +0000 Subject: [PATCH 102/615] Fix unit tests and modify messages --- src/Coordination/KeeperStorage.cpp | 49 +++++++++++-------- src/Coordination/KeeperStorage.h | 1 - src/Coordination/tests/gtest_coordination.cpp | 4 ++ 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index fab37aec1bf..86be0666fc0 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -265,6 +265,12 @@ Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) cons assert(!acl_id); acls = &create_delta.acls; }, + [&](const RemoveNodeDelta & /*remove_delta*/) + { + assert(acl_id || acls); + acl_id.reset(); + acls = nullptr; + }, [&](const SetACLDelta & set_acl_delta) { assert(acl_id || acls); @@ -281,11 +287,13 @@ Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) cons namespace { - [[noreturn]] void fail() - { - LOG_INFO(&Poco::Logger::get("KeeperStorage"), "Inconsistency found, terminating"); - std::terminate(); - } + +[[noreturn]] void onStorageInconsistency() +{ + LOG_INFO(&Poco::Logger::get("KeeperStorage"), "Inconsistency found between uncommitted and committed data. Keeper will terminate to avoid undefined behaviour."); + std::terminate(); +} + } Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_id) @@ -309,7 +317,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i operation.is_ephemeral, std::move(operation.acls), session_id)) - fail(); + onStorageInconsistency(); return Coordination::Error::ZOK; } @@ -317,10 +325,10 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i { auto node_it = container.find(path); if (node_it == container.end()) - fail(); + onStorageInconsistency(); if (operation.version != -1 && operation.version != node_it->value.stat.version) - fail(); + onStorageInconsistency(); container.updateValue(path, operation.update_fn); return Coordination::Error::ZOK; @@ -328,7 +336,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i else if constexpr (std::same_as) { if (!removeNode(path, operation.version)) - fail(); + onStorageInconsistency(); return Coordination::Error::ZOK; } @@ -336,10 +344,10 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i { auto node_it = container.find(path); if (node_it == container.end()) - fail(); + onStorageInconsistency(); if (operation.version != -1 && operation.version != node_it->value.stat.aversion) - fail(); + onStorageInconsistency(); acl_map.removeUsage(node_it->value.acl_id); @@ -365,7 +373,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_i else { // shouldn't be called in any process functions - fail(); + onStorageInconsistency(); } }, delta.operation); @@ -660,8 +668,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr [zxid](const auto & delta) { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); - if (create_delta_it == deltas.end()) - std::terminate(); + assert(create_delta_it != deltas.end()); response.path_created = create_delta_it->path; response.error = Coordination::Error::ZOK; @@ -710,7 +717,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce if constexpr (local) response.error = error_code; else - fail(); + onStorageInconsistency(); }; auto & container = storage.container; @@ -854,7 +861,7 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr if constexpr (local) response.error = error_code; else - fail(); + onStorageInconsistency(); }; auto & container = storage.container; @@ -950,7 +957,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce auto node_it = container.find(request.path); if (node_it == container.end()) - fail(); + onStorageInconsistency(); response.stat = node_it->value.stat; response.error = Coordination::Error::ZOK; @@ -1006,7 +1013,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc if constexpr (local) response.error = error_code; else - fail(); + onStorageInconsistency(); }; auto & container = storage.container; @@ -1089,7 +1096,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro if constexpr (local) response.error = error_code; else - fail(); + onStorageInconsistency(); }; auto & container = storage.container; @@ -1170,7 +1177,7 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr auto node_it = storage.container.find(request.path); if (node_it == storage.container.end()) - fail(); + onStorageInconsistency(); response.stat = node_it->value.stat; response.error = Coordination::Error::ZOK; @@ -1219,7 +1226,7 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr if constexpr (local) response.error = error_code; else - fail(); + onStorageInconsistency(); }; auto & container = storage.container; diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index cf85c366789..09ca731f21e 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -239,7 +239,6 @@ public: /// Global id of all requests applied to storage int64_t zxid{0}; bool finalized{false}; - int64_t last_committed_zxid{0}; /// Currently active watches (node_path -> subscribed sessions) Watches watches; diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 236b33d4497..496c932f497 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1262,6 +1262,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint changelog.append(entry); changelog.end_of_append_batch(0, 0); + state_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); state_machine->commit(i, changelog.entry_at(i)->get_buf()); bool snapshot_created = false; if (i % settings->snapshot_distance == 0) @@ -1306,6 +1307,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint for (size_t i = restore_machine->last_commit_index() + 1; i < restore_changelog.next_slot(); ++i) { + restore_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); restore_machine->commit(i, changelog.entry_at(i)->get_buf()); } @@ -1408,6 +1410,7 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) request_c->path = "/hello"; request_c->is_ephemeral = true; auto entry_c = getLogEntryFromZKRequest(0, 1, request_c); + state_machine->pre_commit(1, entry_c->get_buf()); state_machine->commit(1, entry_c->get_buf()); const auto & storage = state_machine->getStorage(); @@ -1416,6 +1419,7 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) request_d->path = "/hello"; /// Delete from other session auto entry_d = getLogEntryFromZKRequest(0, 2, request_d); + state_machine->pre_commit(2, entry_d->get_buf()); state_machine->commit(2, entry_d->get_buf()); EXPECT_EQ(storage.ephemerals.size(), 0); From f3646cea41c723e41d8bf2fb310da499272b448b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 11 May 2022 12:17:29 +0000 Subject: [PATCH 103/615] Rename preprocess test --- src/Coordination/tests/gtest_coordination.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 824d978888a..2742f48f49e 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1790,7 +1790,7 @@ ResponseType getSingleResponse(const auto & responses) return dynamic_cast(*responses[0].response); } -TEST_P(CoordinationTest, TestUncommittedStateBasicCRUD) +TEST_P(CoordinationTest, TestUncommittedStateBasicCrud) { using namespace DB; using namespace Coordination; From 8af9ab3766b6dcdd740b0225ee1d9cc918534982 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 11 May 2022 15:02:23 +0200 Subject: [PATCH 104/615] Fix test --- .../0_stateless/02187_async_inserts_all_formats.reference | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats.reference b/tests/queries/0_stateless/02187_async_inserts_all_formats.reference index 92ce8dfd2c7..4f7f50bca13 100644 --- a/tests/queries/0_stateless/02187_async_inserts_all_formats.reference +++ b/tests/queries/0_stateless/02187_async_inserts_all_formats.reference @@ -7,6 +7,8 @@ CSVWithNamesAndTypes CustomSeparated CustomSeparatedWithNames CustomSeparatedWithNamesAndTypes +JSONColumns +JSONCompactColumns JSONCompactEachRow JSONCompactEachRowWithNames JSONCompactEachRowWithNamesAndTypes From e65cfaecf899cc768507822dcd8155145a11b97a Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 12 May 2022 00:18:47 +0800 Subject: [PATCH 105/615] remove unused code --- src/Storages/WindowView/StorageWindowView.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index cf4bdaa7b3d..4d5502db895 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1244,19 +1244,6 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } } -class PushingToWindowViewSink final : public SinkToStorage -{ -public: - PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); - String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk chunk) override; - -private: - StorageWindowView & window_view; - StoragePtr storage_holder; - ContextPtr context; -}; - void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, const Block & block, ContextPtr local_context) { From 0d1d6fb27b217996530a9e4b4b6438287d654a55 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Thu, 12 May 2022 02:19:05 +0900 Subject: [PATCH 106/615] consolidate hashid test queries and hold out from fasttest --- tests/queries/0_stateless/02293_hashid.reference | 6 ++++++ tests/queries/0_stateless/02293_hashid.sql | 5 +++++ tests/queries/0_stateless/02293_hashid_arguments.reference | 5 ----- tests/queries/0_stateless/02293_hashid_arguments.sql | 1 - tests/queries/0_stateless/02293_hashid_const.reference | 1 - tests/queries/0_stateless/02293_hashid_const.sql | 1 - 6 files changed, 11 insertions(+), 8 deletions(-) delete mode 100644 tests/queries/0_stateless/02293_hashid_arguments.reference delete mode 100644 tests/queries/0_stateless/02293_hashid_arguments.sql delete mode 100644 tests/queries/0_stateless/02293_hashid_const.reference delete mode 100644 tests/queries/0_stateless/02293_hashid_const.sql diff --git a/tests/queries/0_stateless/02293_hashid.reference b/tests/queries/0_stateless/02293_hashid.reference index 05023857670..9ae4cce3944 100644 --- a/tests/queries/0_stateless/02293_hashid.reference +++ b/tests/queries/0_stateless/02293_hashid.reference @@ -3,3 +3,9 @@ 2 k5 3 l5 4 mO +0 pbgkmdljlpjoapne +1 akemglnjepjpodba +2 obmgndljgajpkeao +3 dldokmpjpgjgeanb +4 nkdlpgajngjnobme +YQrvD5XGvbx diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql index 51bed96c039..e6ee89e8d1a 100644 --- a/tests/queries/0_stateless/02293_hashid.sql +++ b/tests/queries/0_stateless/02293_hashid.sql @@ -1 +1,6 @@ +-- Tags: no-fasttest + select number, hashid(number) from system.numbers limit 5; +select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; +select hashid(1234567890123456, 's3cr3t'); +select hashid(-1); diff --git a/tests/queries/0_stateless/02293_hashid_arguments.reference b/tests/queries/0_stateless/02293_hashid_arguments.reference deleted file mode 100644 index 41f3b213cdb..00000000000 --- a/tests/queries/0_stateless/02293_hashid_arguments.reference +++ /dev/null @@ -1,5 +0,0 @@ -0 pbgkmdljlpjoapne -1 akemglnjepjpodba -2 obmgndljgajpkeao -3 dldokmpjpgjgeanb -4 nkdlpgajngjnobme diff --git a/tests/queries/0_stateless/02293_hashid_arguments.sql b/tests/queries/0_stateless/02293_hashid_arguments.sql deleted file mode 100644 index f1cb3a144e7..00000000000 --- a/tests/queries/0_stateless/02293_hashid_arguments.sql +++ /dev/null @@ -1 +0,0 @@ -select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; diff --git a/tests/queries/0_stateless/02293_hashid_const.reference b/tests/queries/0_stateless/02293_hashid_const.reference deleted file mode 100644 index 93bd202307e..00000000000 --- a/tests/queries/0_stateless/02293_hashid_const.reference +++ /dev/null @@ -1 +0,0 @@ -YQrvD5XGvbx diff --git a/tests/queries/0_stateless/02293_hashid_const.sql b/tests/queries/0_stateless/02293_hashid_const.sql deleted file mode 100644 index b8308d3f55b..00000000000 --- a/tests/queries/0_stateless/02293_hashid_const.sql +++ /dev/null @@ -1 +0,0 @@ -select hashid(1234567890123456, 's3cr3t'); From 5dcd25be23c60bd7472691818a78ceb87a829b2b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 12 May 2022 00:04:54 +0200 Subject: [PATCH 107/615] Initial implementation --- .../registerDiskAzureBlobStorage.cpp | 4 + src/Disks/DiskObjectStorage.cpp | 1228 +++++++++++++++++ src/Disks/DiskObjectStorage.h | 324 +++++ src/Disks/IDisk.h | 3 +- src/Disks/IDiskRemote.h | 19 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 4 +- src/Disks/IObjectStorage.cpp | 37 + src/Disks/IObjectStorage.h | 122 ++ src/Disks/S3/DiskS3.cpp | 1055 -------------- src/Disks/S3/DiskS3.h | 189 --- src/Disks/S3/diskSettings.cpp | 127 ++ src/Disks/S3/diskSettings.h | 29 + src/Disks/S3/parseConfig.h | 30 + src/Disks/S3/registerDiskS3.cpp | 166 +-- src/Disks/S3ObjectStorage.cpp | 436 ++++++ src/Disks/S3ObjectStorage.h | 130 ++ src/IO/ReadBufferFromS3.cpp | 2 +- src/IO/ReadBufferFromS3.h | 8 +- src/IO/S3Common.cpp | 6 +- src/IO/S3Common.h | 4 +- src/IO/WriteBufferFromS3.cpp | 4 +- src/IO/WriteBufferFromS3.h | 7 +- .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 3 +- src/Storages/StorageS3.cpp | 9 +- src/Storages/StorageS3.h | 8 +- tests/integration/test_merge_tree_s3/test.py | 5 +- 29 files changed, 2530 insertions(+), 1435 deletions(-) create mode 100644 src/Disks/DiskObjectStorage.cpp create mode 100644 src/Disks/DiskObjectStorage.h create mode 100644 src/Disks/IObjectStorage.cpp create mode 100644 src/Disks/IObjectStorage.h delete mode 100644 src/Disks/S3/DiskS3.cpp delete mode 100644 src/Disks/S3/DiskS3.h create mode 100644 src/Disks/S3/diskSettings.cpp create mode 100644 src/Disks/S3/diskSettings.h create mode 100644 src/Disks/S3/parseConfig.h create mode 100644 src/Disks/S3ObjectStorage.cpp create mode 100644 src/Disks/S3ObjectStorage.h diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 128c7534b3c..8b2429263bb 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -19,6 +19,9 @@ namespace ErrorCodes extern const int PATH_ACCESS_DENIED; } +namespace +{ + constexpr char test_file[] = "test.txt"; constexpr char test_str[] = "test"; constexpr size_t test_str_size = 4; @@ -71,6 +74,7 @@ std::unique_ptr getSettings(const Poco::Util::Abst ); } +} void registerDiskAzureBlobStorage(DiskFactory & factory) { diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp new file mode 100644 index 00000000000..8fbde6dc6ca --- /dev/null +++ b/src/Disks/DiskObjectStorage.cpp @@ -0,0 +1,1228 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DISK_INDEX; + extern const int UNKNOWN_FORMAT; + extern const int FILE_ALREADY_EXISTS; + extern const int PATH_ACCESS_DENIED;; + extern const int FILE_DOESNT_EXIST; + extern const int BAD_FILE_TYPE; + extern const int MEMORY_LIMIT_EXCEEDED; + extern const int SUPPORT_IS_DISABLED; +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + return result; +} + + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.save(sync); + return result; +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + if (updater(result)) + result.save(sync); + return result; +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + updater(result); + result.save(sync); + return result; +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + if (updater(result)) + result.save(sync); + metadata_disk_->removeFile(metadata_file_path_); + + return result; + +} + +DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite) +{ + if (overwrite || !metadata_disk_->exists(metadata_file_path_)) + { + return createAndStoreMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_, sync); + } + else + { + auto result = readMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + if (result.read_only) + throw Exception("File is read-only: " + metadata_file_path_, ErrorCodes::PATH_ACCESS_DENIED); + return result; + } +} + +void DiskObjectStorage::Metadata::load() +{ + try + { + const ReadSettings read_settings; + auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */ + + UInt32 version; + readIntText(version, *buf); + + if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) + throw Exception( + ErrorCodes::UNKNOWN_FORMAT, + "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", + metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG)); + + assertChar('\n', *buf); + + UInt32 remote_fs_objects_count; + readIntText(remote_fs_objects_count, *buf); + assertChar('\t', *buf); + readIntText(total_size, *buf); + assertChar('\n', *buf); + remote_fs_objects.resize(remote_fs_objects_count); + + for (size_t i = 0; i < remote_fs_objects_count; ++i) + { + String remote_fs_object_path; + size_t remote_fs_object_size; + readIntText(remote_fs_object_size, *buf); + assertChar('\t', *buf); + readEscapedString(remote_fs_object_path, *buf); + if (version == VERSION_ABSOLUTE_PATHS) + { + if (!remote_fs_object_path.starts_with(remote_fs_root_path)) + throw Exception(ErrorCodes::UNKNOWN_FORMAT, + "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", + remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath()); + + remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); + } + assertChar('\n', *buf); + remote_fs_objects[i].relative_path = remote_fs_object_path; + remote_fs_objects[i].bytes_size = remote_fs_object_size; + } + + readIntText(ref_count, *buf); + assertChar('\n', *buf); + + if (version >= VERSION_READ_ONLY_FLAG) + { + readBoolText(read_only, *buf); + assertChar('\n', *buf); + } + } + catch (Exception & e) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + + if (e.code() == ErrorCodes::UNKNOWN_FORMAT) + throw; + + if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) + throw; + + throw Exception("Failed to read metadata file: " + metadata_file_path, ErrorCodes::UNKNOWN_FORMAT); + } +} + +/// Load metadata by path or create empty if `create` flag is set. +DiskObjectStorage::Metadata::Metadata( + const String & remote_fs_root_path_, + DiskPtr metadata_disk_, + const String & metadata_file_path_) + : remote_fs_root_path(remote_fs_root_path_) + , metadata_file_path(metadata_file_path_) + , metadata_disk(metadata_disk_) + , total_size(0), ref_count(0) +{ +} + +void DiskObjectStorage::Metadata::addObject(const String & path, size_t size) +{ + total_size += size; + remote_fs_objects.emplace_back(path, size); +} + + +void DiskObjectStorage::Metadata::saveToBuffer(WriteBuffer & buf, bool sync) +{ + writeIntText(VERSION_RELATIVE_PATHS, buf); + writeChar('\n', buf); + + writeIntText(remote_fs_objects.size(), buf); + writeChar('\t', buf); + writeIntText(total_size, buf); + writeChar('\n', buf); + + for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) + { + writeIntText(remote_fs_object_size, buf); + writeChar('\t', buf); + writeEscapedString(remote_fs_object_path, buf); + writeChar('\n', buf); + } + + writeIntText(ref_count, buf); + writeChar('\n', buf); + + writeBoolText(read_only, buf); + writeChar('\n', buf); + + buf.finalize(); + if (sync) + buf.sync(); + +} + +/// Fsync metadata file if 'sync' flag is set. +void DiskObjectStorage::Metadata::save(bool sync) +{ + auto buf = metadata_disk->writeFile(metadata_file_path, 1024); + saveToBuffer(*buf, sync); +} + +std::string DiskObjectStorage::Metadata::serializeToString() +{ + WriteBufferFromOwnString write_buf; + saveToBuffer(write_buf, false); + return write_buf.str(); +} + +DiskObjectStorage::Metadata DiskObjectStorage::readMetadataUnlocked(const String & path, std::shared_lock &) const +{ + return Metadata::readMetadata(remote_fs_root_path, metadata_disk, path); +} + + +DiskObjectStorage::Metadata DiskObjectStorage::readMetadata(const String & path) const +{ + std::shared_lock lock(metadata_mutex); + return readMetadataUnlocked(path, lock); +} + +DiskObjectStorage::Metadata DiskObjectStorage::readUpdateAndStoreMetadata(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + std::unique_lock lock(metadata_mutex); + return Metadata::readUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); +} + + +DiskObjectStorage::Metadata DiskObjectStorage::readUpdateStoreMetadataAndRemove(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + std::unique_lock lock(metadata_mutex); + return Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater); +} + +DiskObjectStorage::Metadata DiskObjectStorage::readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + if (mode == WriteMode::Rewrite || !metadata_disk->exists(path)) + { + std::unique_lock lock(metadata_mutex); + return Metadata::createUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); + } + else + { + return Metadata::readUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); + } +} + +DiskObjectStorage::Metadata DiskObjectStorage::createAndStoreMetadata(const String & path, bool sync) +{ + return Metadata::createAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync); +} + +DiskObjectStorage::Metadata DiskObjectStorage::createUpdateAndStoreMetadata(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater) +{ + return Metadata::createUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); +} + +std::vector DiskObjectStorage::getRemotePaths(const String & local_path) const +{ + auto metadata = readMetadata(local_path); + + std::vector remote_paths; + for (const auto & [remote_path, _] : metadata.remote_fs_objects) + remote_paths.push_back(fs::path(metadata.remote_fs_root_path) / remote_path); + + return remote_paths; + +} + +void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) +{ + /// Protect against concurrent delition of files (for example because of a merge). + if (metadata_disk->isFile(local_path)) + { + try + { + paths_map.emplace_back(local_path, getRemotePaths(local_path)); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) + return; + throw; + } + } + else + { + DiskDirectoryIteratorPtr it; + try + { + it = iterateDirectory(local_path); + } + catch (const fs::filesystem_error & e) + { + if (e.code() == std::errc::no_such_file_or_directory) + return; + throw; + } + + for (; it->isValid(); it->next()) + DiskObjectStorage::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map); + } +} + +bool DiskObjectStorage::exists(const String & path) const +{ + return metadata_disk->exists(path); +} + + +bool DiskObjectStorage::isFile(const String & path) const +{ + return metadata_disk->isFile(path); +} + + +void DiskObjectStorage::createFile(const String & path) +{ + createAndStoreMetadata(path, false); +} + +size_t DiskObjectStorage::getFileSize(const String & path) const +{ + return readMetadata(path).total_size; +} + +void DiskObjectStorage::moveFile(const String & from_path, const String & to_path) +{ + if (exists(to_path)) + throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); + + metadata_disk->moveFile(from_path, to_path); +} + +void DiskObjectStorage::replaceFile(const String & from_path, const String & to_path) +{ + if (exists(to_path)) + { + const String tmp_path = to_path + ".old"; + moveFile(to_path, tmp_path); + moveFile(from_path, to_path); + removeFile(tmp_path); + } + else + moveFile(from_path, to_path); +} + +void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metadata_only) +{ + std::vector paths_to_remove; + removeMetadata(path, paths_to_remove); + + if (!delete_metadata_only) + removeFromRemoteFS(paths_to_remove); +} + +void DiskObjectStorage::removeFromRemoteFS(const std::vector & paths) +{ + object_storage->removeObjects(paths); +} + +UInt32 DiskObjectStorage::getRefCount(const String & path) const +{ + return readMetadata(path).ref_count; +} + +std::unordered_map DiskObjectStorage::getSerializedMetadata(const std::vector & file_paths) const +{ + std::unordered_map metadatas; + + std::shared_lock lock(metadata_mutex); + + for (const auto & path : file_paths) + { + DiskObjectStorage::Metadata metadata = readMetadataUnlocked(path, lock); + metadata.ref_count = 0; + metadatas[path] = metadata.serializeToString(); + } + + return metadatas; +} + +String DiskObjectStorage::getUniqueId(const String & path) const +{ + LOG_TRACE(log, "Remote path: {}, Path: {}", remote_fs_root_path, path); + auto metadata = readMetadata(path); + String id; + if (!metadata.remote_fs_objects.empty()) + id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].relative_path; + return id; +} + +bool DiskObjectStorage::checkObjectExists(const String & path) const +{ + return object_storage->exists(path); +} + +bool DiskObjectStorage::checkUniqueId(const String & id) const +{ + return checkObjectExists(id); +} + +void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path) +{ + readUpdateAndStoreMetadata(src_path, false, [](Metadata & metadata) { metadata.ref_count++; return true; }); + + /// Create FS hardlink to metadata file. + metadata_disk->createHardLink(src_path, dst_path); + +} + +void DiskObjectStorage::setReadOnly(const String & path) +{ + /// We should store read only flag inside metadata file (instead of using FS flag), + /// because we modify metadata file when create hard-links from it. + readUpdateAndStoreMetadata(path, false, [](Metadata & metadata) { metadata.read_only = true; return true; }); +} + + +bool DiskObjectStorage::isDirectory(const String & path) const +{ + return metadata_disk->isDirectory(path); +} + + +void DiskObjectStorage::createDirectory(const String & path) +{ + metadata_disk->createDirectory(path); +} + + +void DiskObjectStorage::createDirectories(const String & path) +{ + metadata_disk->createDirectories(path); +} + + +void DiskObjectStorage::clearDirectory(const String & path) +{ + for (auto it = iterateDirectory(path); it->isValid(); it->next()) + if (isFile(it->path())) + removeFile(it->path()); +} + + +void DiskObjectStorage::removeDirectory(const String & path) +{ + metadata_disk->removeDirectory(path); +} + + +DiskDirectoryIteratorPtr DiskObjectStorage::iterateDirectory(const String & path) +{ + return metadata_disk->iterateDirectory(path); +} + + +void DiskObjectStorage::listFiles(const String & path, std::vector & file_names) +{ + for (auto it = iterateDirectory(path); it->isValid(); it->next()) + file_names.push_back(it->name()); +} + + +void DiskObjectStorage::setLastModified(const String & path, const Poco::Timestamp & timestamp) +{ + metadata_disk->setLastModified(path, timestamp); +} + + +Poco::Timestamp DiskObjectStorage::getLastModified(const String & path) +{ + return metadata_disk->getLastModified(path); +} + +void DiskObjectStorage::removeMetadata(const String & path, std::vector & paths_to_remove) +{ + LOG_TRACE(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path)); + + if (!metadata_disk->exists(path)) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist", path); + + if (!metadata_disk->isFile(path)) + throw Exception(ErrorCodes::BAD_FILE_TYPE, "Path '{}' is not a regular file", path); + + try + { + auto metadata_updater = [&paths_to_remove, this] (Metadata & metadata) + { + if (metadata.ref_count == 0) + { + for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects) + { + paths_to_remove.push_back(fs::path(remote_fs_root_path) / remote_fs_object_path); + object_storage->removeFromCache(fs::path(remote_fs_root_path) / remote_fs_object_path); + } + + return false; + } + else /// In other case decrement number of references, save metadata and delete hardlink. + { + --metadata.ref_count; + } + + return true; + }; + + readUpdateStoreMetadataAndRemove(path, false, metadata_updater); + /// If there is no references - delete content from remote FS. + } + catch (const Exception & e) + { + /// If it's impossible to read meta - just remove it from FS. + if (e.code() == ErrorCodes::UNKNOWN_FORMAT) + { + LOG_WARNING(log, + "Metadata file {} can't be read by reason: {}. Removing it forcibly.", + backQuote(path), e.nested() ? e.nested()->message() : e.message()); + metadata_disk->removeFile(path); + } + else + throw; + } +} + + +void DiskObjectStorage::removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove) +{ + checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. + + if (metadata_disk->isFile(path)) + { + removeMetadata(path, paths_to_remove[path]); + } + else + { + for (auto it = iterateDirectory(path); it->isValid(); it->next()) + removeMetadataRecursive(it->path(), paths_to_remove); + + metadata_disk->removeDirectory(path); + } +} + + +void DiskObjectStorage::shutdown() +{ + object_storage->shutdown(); +} + +void DiskObjectStorage::startup() +{ + + LOG_INFO(log, "Starting up disk {}", name); + object_storage->startup(); + + if (send_metadata) + { + metadata_helper->restore(); + + if (metadata_helper->readSchemaVersion(remote_fs_root_path) < DiskObjectStorageMetadataHelper::RESTORABLE_SCHEMA_VERSION) + metadata_helper->migrateToRestorableSchema(); + + metadata_helper->findLastRevision(); + } + + LOG_INFO(log, "Disk {} started up", name); +} + +ReservationPtr DiskObjectStorage::reserve(UInt64 bytes) +{ + if (!tryReserve(bytes)) + return {}; + + return std::make_unique(std::static_pointer_cast(shared_from_this()), bytes); +} + +void DiskObjectStorage::removeSharedFileIfExists(const String & path, bool delete_metadata_only) +{ + std::vector paths_to_remove; + if (metadata_disk->exists(path)) + { + removeMetadata(path, paths_to_remove); + if (!delete_metadata_only) + removeFromRemoteFS(paths_to_remove); + } +} + +void DiskObjectStorage::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) +{ + std::unordered_map> paths_to_remove; + removeMetadataRecursive(path, paths_to_remove); + + if (!keep_all_batch_data) + { + std::vector remove_from_remote; + for (auto && [local_path, remote_paths] : paths_to_remove) + { + if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) + remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); + } + removeFromRemoteFS(remove_from_remote); + } +} + +bool DiskObjectStorage::tryReserve(UInt64 bytes) +{ + std::lock_guard lock(reservation_mutex); + if (bytes == 0) + { + LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); + ++reservation_count; + return true; + } + + auto available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (unreserved_space >= bytes) + { + LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", + ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); + ++reservation_count; + reserved_bytes += bytes; + return true; + } + return false; +} + +std::unique_ptr DiskObjectStorage::readFile( + const String & path, + const ReadSettings & settings, + std::optional read_hint, + std::optional file_size) const +{ + auto metadata = readMetadata(path); + return object_storage->readObjects(remote_fs_root_path, metadata.remote_fs_objects, settings, read_hint, file_size); +} + +std::unique_ptr DiskObjectStorage::writeFile( + const String & path, + size_t buf_size, + WriteMode mode, + const WriteSettings & settings) +{ + auto blob_name = getRandomASCIIString(); + + auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) + { + readOrCreateUpdateAndStoreMetadata(path, mode, false, + [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); + }; + + return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, {}, create_metadata_callback, buf_size, settings); +} + + +void DiskObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) +{ + object_storage->applyNewSettings(config, "storage_configuration.disks." + name, context_); +} + +DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const +{ + if (i != 0) + throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); + return disk; +} + +void DiskObjectStorageReservation::update(UInt64 new_size) +{ + std::lock_guard lock(disk->reservation_mutex); + disk->reserved_bytes -= size; + size = new_size; + disk->reserved_bytes += size; +} + +DiskObjectStorageReservation::~DiskObjectStorageReservation() +{ + try + { + std::lock_guard lock(disk->reservation_mutex); + if (disk->reserved_bytes < size) + { + disk->reserved_bytes = 0; + LOG_ERROR(disk->log, "Unbalanced reservations size for disk '{}'.", disk->getName()); + } + else + { + disk->reserved_bytes -= size; + } + + if (disk->reservation_count == 0) + LOG_ERROR(disk->log, "Unbalanced reservation count for disk '{}'.", disk->getName()); + else + --disk->reservation_count; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +static String revisionToString(UInt64 revision) +{ + return std::bitset<64>(revision).to_string(); +} + +void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const +{ + const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; + auto buf = disk->object_storage->writeObject(path, metadata); + buf->write('0'); + buf->finalize(); +} + +void DiskObjectStorageMetadataHelper::findLastRevision() +{ + /// Construct revision number from high to low bits. + String revision; + revision.reserve(64); + for (int bit = 0; bit < 64; ++bit) + { + auto revision_prefix = revision + "1"; + + LOG_TRACE(disk->log, "Check object exists with revision prefix {}", revision_prefix); + + /// Check file or operation with such revision prefix exists. + if (disk->object_storage->exists(disk->remote_fs_root_path + "r" + revision_prefix) + || disk->object_storage->exists(disk->remote_fs_root_path + "operations/r" + revision_prefix)) + revision += "1"; + else + revision += "0"; + } + revision_counter = static_cast(std::bitset<64>(revision).to_ullong()); + LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); +} + +int DiskObjectStorageMetadataHelper::readSchemaVersion(const String & source_path) const +{ + const std::string path = source_path + SCHEMA_VERSION_OBJECT; + int version = 0; + if (!disk->object_storage->exists(path)) + return version; + + auto buf = disk->object_storage->readObject(path); + readIntText(version, *buf); + + return version; +} + +void DiskObjectStorageMetadataHelper::saveSchemaVersion(const int & version) const +{ + auto path = disk->remote_fs_root_path + SCHEMA_VERSION_OBJECT; + + auto buf = disk->object_storage->writeObject(path); + writeIntText(version, *buf); + buf->finalize(); + +} + +void DiskObjectStorageMetadataHelper::updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const +{ + disk->object_storage->copyObject(key, key, metadata); +} + +void DiskObjectStorageMetadataHelper::migrateFileToRestorableSchema(const String & path) const +{ + LOG_TRACE(disk->log, "Migrate file {} to restorable schema", disk->metadata_disk->getPath() + path); + + auto meta = disk->readMetadata(path); + + for (const auto & [key, _] : meta.remote_fs_objects) + { + ObjectAttributes metadata { + {"path", path} + }; + updateObjectMetadata(disk->remote_fs_root_path + key, metadata); + } +} +void DiskObjectStorageMetadataHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) +{ + checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. + + LOG_TRACE(disk->log, "Migrate directory {} to restorable schema", disk->metadata_disk->getPath() + path); + + bool dir_contains_only_files = true; + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + if (disk->isDirectory(it->path())) + { + dir_contains_only_files = false; + break; + } + + /// The whole directory can be migrated asynchronously. + if (dir_contains_only_files) + { + auto result = disk->getExecutor().execute([this, path] + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + migrateFileToRestorableSchema(it->path()); + }); + + results.push_back(std::move(result)); + } + else + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + if (!disk->isDirectory(it->path())) + { + auto source_path = it->path(); + auto result = disk->getExecutor().execute([this, source_path] + { + migrateFileToRestorableSchema(source_path); + }); + + results.push_back(std::move(result)); + } + else + migrateToRestorableSchemaRecursive(it->path(), results); + } + +} + +void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() +{ + try + { + LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name); + + Futures results; + + for (const auto & root : data_roots) + if (disk->exists(root)) + migrateToRestorableSchemaRecursive(root + '/', results); + + for (auto & result : results) + result.wait(); + for (auto & result : results) + result.get(); + + saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, fmt::format("Failed to migrate to restorable schema for disk {}", disk->name)); + + throw; + } +} + +void DiskObjectStorageMetadataHelper::restore() +{ + if (!disk->exists(RESTORE_FILE_NAME)) + return; + + try + { + RestoreInformation information; + information.source_path = disk->remote_fs_root_path; + + readRestoreInformation(information); + if (information.revision == 0) + information.revision = LATEST_REVISION; + if (!information.source_path.ends_with('/')) + information.source_path += '/'; + + /// In this case we need to additionally cleanup S3 from objects with later revision. + /// Will be simply just restore to different path. + if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) + throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); + + /// This case complicates S3 cleanup in case of unsuccessful restore. + if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) + throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); + + LOG_INFO(disk->log, "Starting to restore disk {}. Revision: {}, Source path: {}", + disk->name, information.revision, information.source_path); + + if (readSchemaVersion(information.source_path) < RESTORABLE_SCHEMA_VERSION) + throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); + + LOG_INFO(disk->log, "Removing old metadata..."); + + bool cleanup_s3 = information.source_path != disk->remote_fs_root_path; + for (const auto & root : data_roots) + if (disk->exists(root)) + disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); + + restoreFiles(information); + restoreFileOperations(information); + + disk->metadata_disk->removeFile(RESTORE_FILE_NAME); + + saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); + + LOG_INFO(disk->log, "Restore disk {} finished", disk->name); + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, fmt::format("Failed to restore disk {}", disk->name)); + + throw; + } +} + +void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) +{ + auto buffer = disk->metadata_disk->readFile(RESTORE_FILE_NAME, ReadSettings{}, 512); + buffer->next(); + + try + { + std::map properties; + + while (buffer->hasPendingData()) + { + String property; + readText(property, *buffer); + assertChar('\n', *buffer); + + auto pos = property.find('='); + if (pos == std::string::npos || pos == 0 || pos == property.length()) + throw Exception(fmt::format("Invalid property {} in restore file", property), ErrorCodes::UNKNOWN_FORMAT); + + auto key = property.substr(0, pos); + auto value = property.substr(pos + 1); + + auto it = properties.find(key); + if (it != properties.end()) + throw Exception(fmt::format("Property key duplication {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); + + properties[key] = value; + } + + for (const auto & [key, value] : properties) + { + ReadBufferFromString value_buffer (value); + + if (key == "revision") + readIntText(restore_information.revision, value_buffer); + else if (key == "source_path") + readText(restore_information.source_path, value_buffer); + else if (key == "detached") + readBoolTextWord(restore_information.detached, value_buffer); + else + throw Exception(fmt::format("Unknown key {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); + } + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, "Failed to read restore information"); + throw; + } +} + +static String shrinkKey(const String & path, const String & key) +{ + if (!key.starts_with(path)) + throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR); + + return key.substr(path.length()); +} + +static std::tuple extractRevisionAndOperationFromKey(const String & key) +{ + String revision_str; + String operation; + /// Key has format: ../../r{revision}-{operation} + static const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; + + re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); + + return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; +} + +void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & restore_information) +{ + LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); + + std::vector> results; + auto restore_files = [this, &restore_information, &results](const BlobsPathToSize & keys) + { + std::vector keys_names; + for (const auto & [key, size] : keys) + { + /// Skip file operations objects. They will be processed separately. + if (key.find("/operations/") != String::npos) + continue; + + const auto [revision, _] = extractRevisionAndOperationFromKey(key); + /// Filter early if it's possible to get revision from key. + if (revision > restore_information.revision) + continue; + + keys_names.push_back(key); + } + + if (!keys_names.empty()) + { + auto result = disk->getExecutor().execute([this, &restore_information, keys_names]() + { + processRestoreFiles(restore_information.source_path, keys_names); + }); + + results.push_back(std::move(result)); + } + + return true; + }; + + BlobsPathToSize children; + disk->object_storage->listPrefix(restore_information.source_path, children); + restore_files(children); + + for (auto & result : results) + result.wait(); + for (auto & result : results) + result.get(); + + LOG_INFO(disk->log, "Files are restored for disk {}", disk->name); + +} + +void DiskObjectStorageMetadataHelper::processRestoreFiles(const String & source_path, std::vector keys) +{ + for (const auto & key : keys) + { + auto meta = disk->object_storage->getObjectMetadata(key); + auto object_attributes = meta.attributes; + + String path; + if (object_attributes.has_value()) + { + /// Restore file if object has 'path' in metadata. + auto path_entry = object_attributes->find("path"); + if (path_entry == object_attributes->end()) + { + /// Such keys can remain after migration, we can skip them. + LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); + continue; + } + + path = path_entry->second; + } + else + continue; + + + disk->createDirectories(directoryPath(path)); + auto relative_key = shrinkKey(source_path, key); + + /// Copy object if we restore to different bucket / path. + if (disk->remote_fs_root_path != source_path) + disk->object_storage->copyObject(key, disk->remote_fs_root_path + relative_key); + + auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) + { + metadata.addObject(relative_key, meta.size_bytes); + return true; + }; + + disk->createUpdateAndStoreMetadata(path, false, updater); + + LOG_TRACE(disk->log, "Restored file {}", path); + } + +} + +static String pathToDetached(const String & source_path) +{ + if (source_path.ends_with('/')) + return fs::path(source_path).parent_path().parent_path() / "detached/"; + return fs::path(source_path).parent_path() / "detached/"; +} + +void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInformation & restore_information) +{ + /// Enable recording file operations if we restore to different bucket / path. + bool send_metadata = disk->remote_fs_root_path != restore_information.source_path; + + std::set renames; + auto restore_file_operations = [this, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) + { + const String rename = "rename"; + const String hardlink = "hardlink"; + + for (const auto & [key, _]: keys) + { + const auto [revision, operation] = extractRevisionAndOperationFromKey(key); + if (revision == UNKNOWN_REVISION) + { + LOG_WARNING(disk->log, "Skip key {} with unknown revision", key); + continue; + } + + /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order). + /// We can stop processing if revision of the object is already more than required. + if (revision > restore_information.revision) + return false; + + /// Keep original revision if restore to different bucket / path. + if (send_metadata) + revision_counter = revision - 1; + + auto object_attributes = *(disk->object_storage->getObjectMetadata(key).attributes); + if (operation == rename) + { + auto from_path = object_attributes["from_path"]; + auto to_path = object_attributes["to_path"]; + if (disk->exists(from_path)) + { + disk->moveFile(from_path, to_path); + if (send_metadata) + { + auto next_revision = ++revision_counter; + const ObjectAttributes object_metadata { + {"from_path", from_path}, + {"to_path", to_path} + }; + createFileOperationObject("rename", next_revision, object_attributes); + } + + LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); + + if (restore_information.detached && disk->isDirectory(to_path)) + { + /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way. + if (!from_path.ends_with('/')) + from_path += '/'; + if (!to_path.ends_with('/')) + to_path += '/'; + + /// Always keep latest actual directory path to avoid 'detaching' not existing paths. + auto it = renames.find(from_path); + if (it != renames.end()) + renames.erase(it); + + renames.insert(to_path); + } + } + } + else if (operation == hardlink) + { + auto src_path = object_attributes["src_path"]; + auto dst_path = object_attributes["dst_path"]; + if (disk->exists(src_path)) + { + disk->createDirectories(directoryPath(dst_path)); + if (send_metadata && !dst_path.starts_with("shadow/")) + { + auto next_revision = ++revision_counter; + const ObjectAttributes object_metadata { + {"src_path", src_path}, + {"dst_path", dst_path} + }; + createFileOperationObject("hardlink", next_revision, object_attributes); + } + disk->createHardLink(src_path, dst_path); + LOG_TRACE(disk->log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); + } + } + } + + return true; + }; + + BlobsPathToSize children; + disk->object_storage->listPrefix(restore_information.source_path + "operations/", children); + restore_file_operations(children); + + if (restore_information.detached) + { + Strings not_finished_prefixes{"tmp_", "delete_tmp_", "attaching_", "deleting_"}; + + for (const auto & path : renames) + { + /// Skip already detached parts. + if (path.find("/detached/") != std::string::npos) + continue; + + /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. + fs::path directory_path(path); + auto directory_name = directory_path.parent_path().filename().string(); + + auto predicate = [&directory_name](String & prefix) { return directory_name.starts_with(prefix); }; + if (std::any_of(not_finished_prefixes.begin(), not_finished_prefixes.end(), predicate)) + continue; + + auto detached_path = pathToDetached(path); + + LOG_TRACE(disk->log, "Move directory to 'detached' {} -> {}", path, detached_path); + + fs::path from_path = fs::path(path); + fs::path to_path = fs::path(detached_path); + if (path.ends_with('/')) + to_path /= from_path.parent_path().filename(); + else + to_path /= from_path.filename(); + + /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename + if (disk->metadata_disk->exists(to_path)) + disk->metadata_disk->removeRecursive(to_path); + + disk->createDirectories(directoryPath(to_path)); + disk->metadata_disk->moveDirectory(from_path, to_path); + } + } + + LOG_INFO(disk->log, "File operations restored for disk {}", disk->name); +} + + +} diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h new file mode 100644 index 00000000000..2147f9527d5 --- /dev/null +++ b/src/Disks/DiskObjectStorage.h @@ -0,0 +1,324 @@ +#pragma once + +#include +#include +#include + +namespace CurrentMetrics +{ + extern const Metric DiskSpaceReservedForMerge; +} + +namespace DB +{ + +class DiskObjectStorageMetadataHelper; + +class DiskObjectStorage : public IDisk +{ + +friend class DiskObjectStorageReservation; +friend class DiskObjectStorageMetadataHelper; + +public: + DiskObjectStorage( + const String & name_, + const String & remote_fs_root_path_, + const String & log_name, + DiskPtr metadata_disk_, + ObjectStoragePtr && object_storage_, + DiskType disk_type_, + bool send_metadata_) + : name(name_) + , remote_fs_root_path(remote_fs_root_path_) + , log (&Poco::Logger::get(log_name)) + , metadata_disk(metadata_disk_) + , disk_type(disk_type_) + , object_storage(std::move(object_storage_)) + , send_metadata(send_metadata_) + , metadata_helper(std::make_unique(this, ReadSettings{})) + {} + + DiskType getType() const override { return disk_type; } + + bool supportZeroCopyReplication() const override { return true; } + + bool supportParallelWrite() const override { return true; } + + struct Metadata; + using MetadataUpdater = std::function; + + const String & getName() const final override { return name; } + + const String & getPath() const final override { return metadata_disk->getPath(); } + + std::vector getRemotePaths(const String & local_path) const final override; + + void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; + + /// Methods for working with metadata. For some operations (like hardlink + /// creation) metadata can be updated concurrently from multiple threads + /// (file actually rewritten on disk). So additional RW lock is required for + /// metadata read and write, but not for create new metadata. + Metadata readMetadata(const String & path) const; + Metadata readMetadataUnlocked(const String & path, std::shared_lock &) const; + Metadata readUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater); + Metadata readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater); + + Metadata readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, MetadataUpdater updater); + + Metadata createAndStoreMetadata(const String & path, bool sync); + Metadata createUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater); + + UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } + + UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } + + UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } + + UInt64 getKeepingFreeSpace() const override { return 0; } + + bool exists(const String & path) const override; + + bool isFile(const String & path) const override; + + void createFile(const String & path) override; + + size_t getFileSize(const String & path) const override; + + void moveFile(const String & from_path, const String & to_path) override; + + void replaceFile(const String & from_path, const String & to_path) override; + + void removeFile(const String & path) override { removeSharedFile(path, false); } + + void removeFileIfExists(const String & path) override { removeSharedFileIfExists(path, false); } + + void removeRecursive(const String & path) override { removeSharedRecursive(path, false, {}); } + + void removeSharedFile(const String & path, bool delete_metadata_only) override; + + void removeSharedFileIfExists(const String & path, bool delete_metadata_only) override; + + void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; + + void removeFromRemoteFS(const std::vector & paths); + + DiskPtr getMetadataDiskIfExistsOrSelf() override { return metadata_disk; } + + UInt32 getRefCount(const String & path) const override; + + /// Return metadata for each file path. Also, before serialization reset + /// ref_count for each metadata to zero. This function used only for remote + /// fetches/sends in replicated engines. That's why we reset ref_count to zero. + std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; + + String getUniqueId(const String & path) const override; + + bool checkObjectExists(const String & path) const; + bool checkUniqueId(const String & id) const override; + + void createHardLink(const String & src_path, const String & dst_path) override; + + void listFiles(const String & path, std::vector & file_names) override; + + void setReadOnly(const String & path) override; + + bool isDirectory(const String & path) const override; + + void createDirectory(const String & path) override; + + void createDirectories(const String & path) override; + + void clearDirectory(const String & path) override; + + void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); } + + void removeDirectory(const String & path) override; + + DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; + + void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; + + Poco::Timestamp getLastModified(const String & path) override; + + bool isRemote() const override { return true; } + + void shutdown() override; + + void startup() override; + + ReservationPtr reserve(UInt64 bytes) override; + + std::unique_ptr readFile( + const String & path, + const ReadSettings & settings, + std::optional read_hint, + std::optional file_size) const override; + + std::unique_ptr writeFile( + const String & path, + size_t buf_size, + WriteMode mode, + const WriteSettings & settings) override; + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override; + +private: + const String name; + const String remote_fs_root_path; + Poco::Logger * log; + DiskPtr metadata_disk; + + const DiskType disk_type; + ObjectStoragePtr object_storage; + + UInt64 reserved_bytes = 0; + UInt64 reservation_count = 0; + std::mutex reservation_mutex; + + mutable std::shared_mutex metadata_mutex; + void removeMetadata(const String & path, std::vector & paths_to_remove); + + void removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove); + + bool tryReserve(UInt64 bytes); + + bool send_metadata; + + std::unique_ptr metadata_helper; +}; + +struct DiskObjectStorage::Metadata +{ + using Updater = std::function; + /// Metadata file version. + static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; + static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; + static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; + + /// Remote FS objects paths and their sizes. + std::vector remote_fs_objects; + + /// URI + const String & remote_fs_root_path; + + /// Relative path to metadata file on local FS. + const String metadata_file_path; + + DiskPtr metadata_disk; + + /// Total size of all remote FS (S3, HDFS) objects. + size_t total_size = 0; + + /// Number of references (hardlinks) to this metadata file. + /// + /// FIXME: Why we are tracking it explicetly, without + /// info from filesystem???? + UInt32 ref_count = 0; + + /// Flag indicates that file is read only. + bool read_only = false; + + Metadata( + const String & remote_fs_root_path_, + DiskPtr metadata_disk_, + const String & metadata_file_path_); + + void addObject(const String & path, size_t size); + + static Metadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_); + static Metadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + static Metadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + + static Metadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync); + static Metadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + static Metadata createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite); + + /// Serialize metadata to string (very same with saveToBuffer) + std::string serializeToString(); + +private: + /// Fsync metadata file if 'sync' flag is set. + void save(bool sync = false); + void saveToBuffer(WriteBuffer & buffer, bool sync); + void load(); +}; + +class DiskObjectStorageReservation final : public IReservation +{ +public: + DiskObjectStorageReservation(const std::shared_ptr & disk_, UInt64 size_) + : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + { + } + + UInt64 getSize() const override { return size; } + + DiskPtr getDisk(size_t i) const override; + + Disks getDisks() const override { return {disk}; } + + void update(UInt64 new_size) override; + + ~DiskObjectStorageReservation() override; + +private: + std::shared_ptr disk; + UInt64 size; + CurrentMetrics::Increment metric_increment; +}; + +class DiskObjectStorageMetadataHelper +{ +public: + static constexpr UInt64 LATEST_REVISION = std::numeric_limits::max(); + static constexpr UInt64 UNKNOWN_REVISION = 0; + + DiskObjectStorageMetadataHelper(DiskObjectStorage * disk_, ReadSettings read_settings_) + : disk(disk_) + , read_settings(std::move(read_settings_)) + { + } + + struct RestoreInformation + { + UInt64 revision = LATEST_REVISION; + String source_path; + bool detached = false; + }; + + using Futures = std::vector>; + + void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; + void findLastRevision(); + + int readSchemaVersion(const String & source_path) const; + void saveSchemaVersion(const int & version) const; + void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; + void migrateFileToRestorableSchema(const String & path) const; + void migrateToRestorableSchemaRecursive(const String & path, Futures & results); + void migrateToRestorableSchema(); + + void restore(); + void readRestoreInformation(RestoreInformation & restore_information); + void restoreFiles(const RestoreInformation & restore_information); + void processRestoreFiles(const String & source_path, std::vector keys); + void restoreFileOperations(const RestoreInformation & restore_information); + + std::atomic revision_counter = 0; + inline static const String RESTORE_FILE_NAME = "restore"; + + /// Object contains information about schema version. + inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; + /// Version with possibility to backup-restore metadata. + static constexpr int RESTORABLE_SCHEMA_VERSION = 1; + /// Directories with data. + const std::vector data_roots {"data", "store"}; + + DiskObjectStorage * disk; + + ReadSettings read_settings; +}; + +} diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index c4578d51b6e..1071e1294b6 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -289,7 +290,7 @@ public: virtual bool isReadOnly() const { return false; } - /// Check if disk is broken. Broken disks will have 0 space and not be used. + /// Check if disk is broken. Broken disks will have 0 space and connot be used. virtual bool isBroken() const { return false; } /// Invoked when Global Context is shutdown. diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 65bcdf3e719..327452c0bbf 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -21,24 +21,6 @@ namespace CurrentMetrics namespace DB { -/// Path to blob with it's size -struct BlobPathWithSize -{ - std::string relative_path; - uint64_t bytes_size; - - BlobPathWithSize() = default; - BlobPathWithSize(const BlobPathWithSize & other) = default; - - BlobPathWithSize(const std::string & relative_path_, uint64_t bytes_size_) - : relative_path(relative_path_) - , bytes_size(bytes_size_) - {} -}; - -/// List of blobs with their sizes -using BlobsPathToSize = std::vector; - class IAsynchronousReader; using AsynchronousReaderPtr = std::shared_ptr; @@ -153,6 +135,7 @@ public: virtual void removeFromRemoteFS(const std::vector & paths) = 0; static AsynchronousReaderPtr getThreadPoolReader(); + static ThreadPool & getThreadPoolWriter(); DiskPtr getMetadataDiskIfExistsOrSelf() override { return metadata_disk; } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index da2d1dee4b2..f0beaab67cf 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -98,7 +98,7 @@ class ReadBufferFromS3Gather final : public ReadBufferFromRemoteFSGather { public: ReadBufferFromS3Gather( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & version_id_, const std::string & common_path_prefix_, @@ -116,7 +116,7 @@ public: SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: - std::shared_ptr client_ptr; + std::shared_ptr client_ptr; String bucket; String version_id; UInt64 max_single_read_retries; diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/IObjectStorage.cpp new file mode 100644 index 00000000000..ac8f3fc39e8 --- /dev/null +++ b/src/Disks/IObjectStorage.cpp @@ -0,0 +1,37 @@ +#include +#include + +namespace DB +{ +AsynchronousReaderPtr IObjectStorage::getThreadPoolReader() +{ + constexpr size_t pool_size = 50; + constexpr size_t queue_size = 1000000; + static AsynchronousReaderPtr reader = std::make_shared(pool_size, queue_size); + return reader; +} + +ThreadPool & IObjectStorage::getThreadPoolWriter() +{ + constexpr size_t pool_size = 100; + constexpr size_t queue_size = 1000000; + static ThreadPool writer(pool_size, pool_size, queue_size); + return writer; +} + + +std::string IObjectStorage::getCacheBasePath() const +{ + return cache ? cache->getBasePath() : ""; +} + +void IObjectStorage::removeFromCache(const std::string & path) +{ + if (cache) + { + auto key = cache->hash(path); + cache->remove(key); + } +} + +} diff --git a/src/Disks/IObjectStorage.h b/src/Disks/IObjectStorage.h new file mode 100644 index 00000000000..f2cc9b90294 --- /dev/null +++ b/src/Disks/IObjectStorage.h @@ -0,0 +1,122 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +class ReadBufferFromFileBase; +class WriteBufferFromFileBase; + +using ObjectAttributes = std::map; + +/// Path to blob with it's size +struct BlobPathWithSize +{ + std::string relative_path; + uint64_t bytes_size; + + BlobPathWithSize() = default; + BlobPathWithSize(const BlobPathWithSize & other) = default; + + BlobPathWithSize(const std::string & relative_path_, uint64_t bytes_size_) + : relative_path(relative_path_) + , bytes_size(bytes_size_) + {} +}; + +/// List of blobs with their sizes +using BlobsPathToSize = std::vector; + +struct ObjectMetadata +{ + uint64_t size_bytes; + std::optional last_modified; + std::optional attributes; +}; + +using FinalizeCallback = std::function; + +class IObjectStorage +{ +public: + explicit IObjectStorage(FileCachePtr && cache_) + : cache(std::move(cache_)) + {} + + virtual bool exists(const std::string & path) const = 0; + + virtual void listPrefix(const std::string & path, BlobsPathToSize & children) const = 0; + + virtual ObjectMetadata getObjectMetadata(const std::string & path) const = 0; + + virtual std::unique_ptr readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const = 0; + + virtual std::unique_ptr readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const = 0; + + /// Open the file for write and return WriteBufferFromFileBase object. + virtual std::unique_ptr writeObject( /// NOLINT + const std::string & path, + std::optional attributes = {}, + FinalizeCallback && finalize_callback = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + const WriteSettings & write_settings = {}) = 0; + + /// Remove file. Throws exception if file doesn't exists or it's a directory. + virtual void removeObject(const std::string & path) = 0; + + virtual void removeObjects(const std::vector & paths) = 0; + + /// Remove file if it exists. + virtual void removeObjectIfExists(const std::string & path) = 0; + + virtual void removeObjectsIfExist(const std::vector & paths) = 0; + + virtual void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) = 0; + + virtual ~IObjectStorage() = default; + + std::string getCacheBasePath() const; + + static AsynchronousReaderPtr getThreadPoolReader(); + + static ThreadPool & getThreadPoolWriter(); + + virtual void shutdown() = 0; + + virtual void startup() = 0; + + void removeFromCache(const std::string & path); + + virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + +protected: + FileCachePtr cache; +}; + +using ObjectStoragePtr = std::unique_ptr; + +} diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp deleted file mode 100644 index 5e3d7031350..00000000000 --- a/src/Disks/S3/DiskS3.cpp +++ /dev/null @@ -1,1055 +0,0 @@ -#include "DiskS3.h" - -#if USE_AWS_S3 -#include "Disks/DiskFactory.h" - -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int S3_ERROR; - extern const int FILE_ALREADY_EXISTS; - extern const int UNKNOWN_FORMAT; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; -} - -template -void throwIfError(Aws::Utils::Outcome & response) -{ - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw Exception(std::to_string(static_cast(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR); - } -} - -template -void throwIfError(const Aws::Utils::Outcome & response) -{ - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw Exception(err.GetMessage(), static_cast(err.GetErrorType())); - } -} -template -void logIfError(Aws::Utils::Outcome & response, Fn auto && msg) -{ - try - { - throwIfError(response); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__, msg()); - } -} - -template -void logIfError(const Aws::Utils::Outcome & response, Fn auto && msg) -{ - try - { - throwIfError(response); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__, msg()); - } -} - -DiskS3::DiskS3( - String name_, - String bucket_, - String s3_root_path_, - String version_id_, - DiskPtr metadata_disk_, - FileCachePtr cache_, - ContextPtr context_, - SettingsPtr settings_, - GetDiskSettings settings_getter_) - : IDiskRemote(name_, s3_root_path_, metadata_disk_, std::move(cache_), "DiskS3", settings_->thread_pool_size) - , bucket(std::move(bucket_)) - , version_id(std::move(version_id_)) - , current_settings(std::move(settings_)) - , settings_getter(settings_getter_) - , context(context_) -{ -} - -void DiskS3::removeFromRemoteFS(const std::vector & paths) -{ - auto settings = current_settings.get(); - - size_t chunk_size_limit = settings->objects_chunk_size_to_delete; - size_t current_position = 0; - while (current_position < paths.size()) - { - std::vector current_chunk; - String keys; - for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(paths[current_position]); - current_chunk.push_back(obj); - - if (!keys.empty()) - keys += ", "; - keys += paths[current_position]; - } - - LOG_TRACE(log, "Remove AWS keys {}", keys); - Aws::S3::Model::Delete delkeys; - delkeys.SetObjects(current_chunk); - Aws::S3::Model::DeleteObjectsRequest request; - request.SetBucket(bucket); - request.SetDelete(delkeys); - auto outcome = settings->client->DeleteObjects(request); - logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); - } -} - -void DiskS3::moveFile(const String & from_path, const String & to_path) -{ - auto settings = current_settings.get(); - - moveFile(from_path, to_path, settings->send_metadata); -} - -void DiskS3::moveFile(const String & from_path, const String & to_path, bool send_metadata) -{ - if (exists(to_path)) - throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); - - if (send_metadata) - { - auto revision = ++revision_counter; - const ObjectMetadata object_metadata { - {"from_path", from_path}, - {"to_path", to_path} - }; - createFileOperationObject("rename", revision, object_metadata); - } - metadata_disk->moveFile(from_path, to_path); -} - -std::unique_ptr DiskS3::readFile(const String & path, const ReadSettings & read_settings, std::optional, std::optional) const -{ - auto settings = current_settings.get(); - auto metadata = readMetadata(path); - - LOG_TEST(log, "Read from file by path: {}. Existing S3 objects: {}", - backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - - ReadSettings disk_read_settings{read_settings}; - if (cache) - { - if (IFileCache::isReadOnly()) - disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; - - disk_read_settings.remote_fs_cache = cache; - } - - auto s3_impl = std::make_unique( - settings->client, bucket, version_id, metadata.remote_fs_root_path, metadata.remote_fs_objects, - settings->s3_settings.max_single_read_retries, disk_read_settings); - - if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - auto reader = getThreadPoolReader(); - return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); - } - else - { - auto buf = std::make_unique(std::move(s3_impl)); - return std::make_unique(std::move(buf), settings->min_bytes_for_seek); - } -} - -std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & write_settings) -{ - auto settings = current_settings.get(); - - /// Path to store new S3 object. - auto blob_name = getRandomASCIIString(); - - std::optional object_metadata; - if (settings->send_metadata) - { - auto revision = ++revision_counter; - object_metadata = { - {"path", path} - }; - blob_name = "r" + revisionToString(revision) + "-file-" + blob_name; - } - - LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", - mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); - - bool cache_on_write = cache - && fs::path(path).extension() != ".tmp" - && write_settings.enable_filesystem_cache_on_write_operations - && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; - - auto s3_buffer = std::make_unique( - settings->client, - bucket, - fs::path(remote_fs_root_path) / blob_name, - settings->s3_settings, - std::move(object_metadata), - buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), cache_on_write ? cache : nullptr); - - auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) - { - readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_name, count] (Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); - }; - - return std::make_unique(std::move(s3_buffer), std::move(create_metadata_callback), fs::path(remote_fs_root_path) / blob_name); -} - -void DiskS3::createHardLink(const String & src_path, const String & dst_path) -{ - auto settings = current_settings.get(); - createHardLink(src_path, dst_path, settings->send_metadata); -} - -void DiskS3::createHardLink(const String & src_path, const String & dst_path, bool send_metadata) -{ - /// We don't need to record hardlinks created to shadow folder. - if (send_metadata && !dst_path.starts_with("shadow/")) - { - auto revision = ++revision_counter; - const ObjectMetadata object_metadata { - {"src_path", src_path}, - {"dst_path", dst_path} - }; - createFileOperationObject("hardlink", revision, object_metadata); - } - - IDiskRemote::createHardLink(src_path, dst_path); -} - -void DiskS3::shutdown() -{ - auto settings = current_settings.get(); - /// This call stops any next retry attempts for ongoing S3 requests. - /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome. - /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors. - /// This should significantly speed up shutdown process if S3 is unhealthy. - settings->client->DisableRequestProcessing(); -} - -void DiskS3::createFileOperationObject(const String & operation_name, UInt64 revision, const DiskS3::ObjectMetadata & metadata) -{ - auto settings = current_settings.get(); - const String key = "operations/r" + revisionToString(revision) + "-" + operation_name; - WriteBufferFromS3 buffer( - settings->client, - bucket, - remote_fs_root_path + key, - settings->s3_settings, - metadata); - - buffer.write('0'); - buffer.finalize(); -} - -void DiskS3::startup() -{ - auto settings = current_settings.get(); - - /// Need to be enabled if it was disabled during shutdown() call. - settings->client->EnableRequestProcessing(); - - if (!settings->send_metadata) - return; - - LOG_INFO(log, "Starting up disk {}", name); - - restore(); - - if (readSchemaVersion(bucket, remote_fs_root_path) < RESTORABLE_SCHEMA_VERSION) - migrateToRestorableSchema(); - - findLastRevision(); - - LOG_INFO(log, "Disk {} started up", name); -} - -void DiskS3::findLastRevision() -{ - /// Construct revision number from high to low bits. - String revision; - revision.reserve(64); - for (int bit = 0; bit < 64; ++bit) - { - auto revision_prefix = revision + "1"; - - LOG_TRACE(log, "Check object exists with revision prefix {}", revision_prefix); - - /// Check file or operation with such revision prefix exists. - if (checkObjectExists(bucket, remote_fs_root_path + "r" + revision_prefix) - || checkObjectExists(bucket, remote_fs_root_path + "operations/r" + revision_prefix)) - revision += "1"; - else - revision += "0"; - } - revision_counter = static_cast(std::bitset<64>(revision).to_ullong()); - LOG_INFO(log, "Found last revision number {} for disk {}", revision_counter, name); -} - -int DiskS3::readSchemaVersion(const String & source_bucket, const String & source_path) -{ - int version = 0; - if (!checkObjectExists(source_bucket, source_path + SCHEMA_VERSION_OBJECT)) - return version; - - auto settings = current_settings.get(); - ReadBufferFromS3 buffer( - settings->client, - source_bucket, - source_path + SCHEMA_VERSION_OBJECT, - version_id, - settings->s3_settings.max_single_read_retries, - context->getReadSettings()); - - readIntText(version, buffer); - - return version; -} - -void DiskS3::saveSchemaVersion(const int & version) -{ - auto settings = current_settings.get(); - - WriteBufferFromS3 buffer( - settings->client, - bucket, - remote_fs_root_path + SCHEMA_VERSION_OBJECT, - settings->s3_settings); - - writeIntText(version, buffer); - buffer.finalize(); -} - -void DiskS3::updateObjectMetadata(const String & key, const ObjectMetadata & metadata) -{ - copyObjectImpl(bucket, key, bucket, key, std::nullopt, metadata); -} - -void DiskS3::migrateFileToRestorableSchema(const String & path) -{ - LOG_TRACE(log, "Migrate file {} to restorable schema", metadata_disk->getPath() + path); - - auto meta = readMetadata(path); - - for (const auto & [key, _] : meta.remote_fs_objects) - { - ObjectMetadata metadata { - {"path", path} - }; - updateObjectMetadata(remote_fs_root_path + key, metadata); - } -} - -void DiskS3::migrateToRestorableSchemaRecursive(const String & path, Futures & results) -{ - checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. - - LOG_TRACE(log, "Migrate directory {} to restorable schema", metadata_disk->getPath() + path); - - bool dir_contains_only_files = true; - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - if (isDirectory(it->path())) - { - dir_contains_only_files = false; - break; - } - - /// The whole directory can be migrated asynchronously. - if (dir_contains_only_files) - { - auto result = getExecutor().execute([this, path] - { - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - migrateFileToRestorableSchema(it->path()); - }); - - results.push_back(std::move(result)); - } - else - { - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - if (!isDirectory(it->path())) - { - auto source_path = it->path(); - auto result = getExecutor().execute([this, source_path] - { - migrateFileToRestorableSchema(source_path); - }); - - results.push_back(std::move(result)); - } - else - migrateToRestorableSchemaRecursive(it->path(), results); - } -} - -void DiskS3::migrateToRestorableSchema() -{ - try - { - LOG_INFO(log, "Start migration to restorable schema for disk {}", name); - - Futures results; - - for (const auto & root : data_roots) - if (exists(root)) - migrateToRestorableSchemaRecursive(root + '/', results); - - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); - - saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); - } - catch (const Exception &) - { - tryLogCurrentException(log, fmt::format("Failed to migrate to restorable schema for disk {}", name)); - - throw; - } -} - -bool DiskS3::checkObjectExists(const String & source_bucket, const String & prefix) const -{ - auto settings = current_settings.get(); - Aws::S3::Model::ListObjectsV2Request request; - request.SetBucket(source_bucket); - request.SetPrefix(prefix); - request.SetMaxKeys(1); - - auto outcome = settings->client->ListObjectsV2(request); - throwIfError(outcome); - - return !outcome.GetResult().GetContents().empty(); -} - -bool DiskS3::checkUniqueId(const String & id) const -{ - auto settings = current_settings.get(); - /// Check that we have right s3 and have access rights - /// Actually interprets id as s3 object name and checks if it exists - Aws::S3::Model::ListObjectsV2Request request; - request.SetBucket(bucket); - request.SetPrefix(id); - - auto outcome = settings->client->ListObjectsV2(request); - throwIfError(outcome); - - Aws::Vector object_list = outcome.GetResult().GetContents(); - - for (const auto & object : object_list) - if (object.GetKey() == id) - return true; - return false; -} - -Aws::S3::Model::HeadObjectResult DiskS3::headObject(const String & source_bucket, const String & key) const -{ - auto settings = current_settings.get(); - Aws::S3::Model::HeadObjectRequest request; - request.SetBucket(source_bucket); - request.SetKey(key); - - auto outcome = settings->client->HeadObject(request); - throwIfError(outcome); - - return outcome.GetResultWithOwnership(); -} - -void DiskS3::listObjects(const String & source_bucket, const String & source_path, std::function callback) const -{ - auto settings = current_settings.get(); - Aws::S3::Model::ListObjectsV2Request request; - request.SetBucket(source_bucket); - request.SetPrefix(source_path); - request.SetMaxKeys(settings->list_object_keys_size); - - Aws::S3::Model::ListObjectsV2Outcome outcome; - do - { - outcome = settings->client->ListObjectsV2(request); - throwIfError(outcome); - - bool should_continue = callback(outcome.GetResult()); - - if (!should_continue) - break; - - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - } while (outcome.GetResult().GetIsTruncated()); -} - -void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head) const -{ - if (head && (head->GetContentLength() >= static_cast(5_GiB))) - copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head); - else - copyObjectImpl(src_bucket, src_key, dst_bucket, dst_key); -} - -void DiskS3::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head, - std::optional> metadata) const -{ - auto settings = current_settings.get(); - Aws::S3::Model::CopyObjectRequest request; - request.SetCopySource(src_bucket + "/" + src_key); - request.SetBucket(dst_bucket); - request.SetKey(dst_key); - if (metadata) - { - request.SetMetadata(*metadata); - request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); - } - - auto outcome = settings->client->CopyObject(request); - - if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge") - { // Can't come here with MinIO, MinIO allows single part upload for large objects. - copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata); - return; - } - - throwIfError(outcome); -} - -void DiskS3::copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head, - std::optional> metadata) const -{ - LOG_TRACE(log, "Multipart copy upload has created. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, Metadata: {}", - src_bucket, src_key, dst_bucket, dst_key, metadata ? "REPLACE" : "NOT_SET"); - - auto settings = current_settings.get(); - - if (!head) - head = headObject(src_bucket, src_key); - - size_t size = head->GetContentLength(); - - String multipart_upload_id; - - { - Aws::S3::Model::CreateMultipartUploadRequest request; - request.SetBucket(dst_bucket); - request.SetKey(dst_key); - if (metadata) - request.SetMetadata(*metadata); - - auto outcome = settings->client->CreateMultipartUpload(request); - - throwIfError(outcome); - - multipart_upload_id = outcome.GetResult().GetUploadId(); - } - - std::vector part_tags; - - size_t upload_part_size = settings->s3_settings.min_upload_part_size; - for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) - { - Aws::S3::Model::UploadPartCopyRequest part_request; - part_request.SetCopySource(src_bucket + "/" + src_key); - part_request.SetBucket(dst_bucket); - part_request.SetKey(dst_key); - part_request.SetUploadId(multipart_upload_id); - part_request.SetPartNumber(part_number); - part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1)); - - auto outcome = settings->client->UploadPartCopy(part_request); - if (!outcome.IsSuccess()) - { - Aws::S3::Model::AbortMultipartUploadRequest abort_request; - abort_request.SetBucket(dst_bucket); - abort_request.SetKey(dst_key); - abort_request.SetUploadId(multipart_upload_id); - settings->client->AbortMultipartUpload(abort_request); - // In error case we throw exception later with first error from UploadPartCopy - } - throwIfError(outcome); - - auto etag = outcome.GetResult().GetCopyPartResult().GetETag(); - part_tags.push_back(etag); - } - - { - Aws::S3::Model::CompleteMultipartUploadRequest req; - req.SetBucket(dst_bucket); - req.SetKey(dst_key); - req.SetUploadId(multipart_upload_id); - - Aws::S3::Model::CompletedMultipartUpload multipart_upload; - for (size_t i = 0; i < part_tags.size(); ++i) - { - Aws::S3::Model::CompletedPart part; - multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); - } - - req.SetMultipartUpload(multipart_upload); - - auto outcome = settings->client->CompleteMultipartUpload(req); - - throwIfError(outcome); - - LOG_TRACE(log, "Multipart copy upload has completed. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, " - "Upload_id: {}, Parts: {}", src_bucket, src_key, dst_bucket, dst_key, multipart_upload_id, part_tags.size()); - } -} - -struct DiskS3::RestoreInformation -{ - UInt64 revision = LATEST_REVISION; - String source_bucket; - String source_path; - bool detached = false; -}; - -void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_information) -{ - const ReadSettings read_settings; - auto buffer = metadata_disk->readFile(RESTORE_FILE_NAME, read_settings, 512); - buffer->next(); - - try - { - std::map properties; - - while (buffer->hasPendingData()) - { - String property; - readText(property, *buffer); - assertChar('\n', *buffer); - - auto pos = property.find('='); - if (pos == String::npos || pos == 0 || pos == property.length()) - throw Exception(fmt::format("Invalid property {} in restore file", property), ErrorCodes::UNKNOWN_FORMAT); - - auto key = property.substr(0, pos); - auto value = property.substr(pos + 1); - - auto it = properties.find(key); - if (it != properties.end()) - throw Exception(fmt::format("Property key duplication {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); - - properties[key] = value; - } - - for (const auto & [key, value] : properties) - { - ReadBufferFromString value_buffer (value); - - if (key == "revision") - readIntText(restore_information.revision, value_buffer); - else if (key == "source_bucket") - readText(restore_information.source_bucket, value_buffer); - else if (key == "source_path") - readText(restore_information.source_path, value_buffer); - else if (key == "detached") - readBoolTextWord(restore_information.detached, value_buffer); - else - throw Exception(fmt::format("Unknown key {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); - } - } - catch (const Exception &) - { - tryLogCurrentException(log, "Failed to read restore information"); - throw; - } -} - -void DiskS3::restore() -{ - if (!exists(RESTORE_FILE_NAME)) - return; - - try - { - RestoreInformation information; - information.source_bucket = bucket; - information.source_path = remote_fs_root_path; - - readRestoreInformation(information); - if (information.revision == 0) - information.revision = LATEST_REVISION; - if (!information.source_path.ends_with('/')) - information.source_path += '/'; - - if (information.source_bucket == bucket) - { - /// In this case we need to additionally cleanup S3 from objects with later revision. - /// Will be simply just restore to different path. - if (information.source_path == remote_fs_root_path && information.revision != LATEST_REVISION) - throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); - - /// This case complicates S3 cleanup in case of unsuccessful restore. - if (information.source_path != remote_fs_root_path && remote_fs_root_path.starts_with(information.source_path)) - throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); - } - - LOG_INFO(log, "Starting to restore disk {}. Revision: {}, Source bucket: {}, Source path: {}", - name, information.revision, information.source_bucket, information.source_path); - - if (readSchemaVersion(information.source_bucket, information.source_path) < RESTORABLE_SCHEMA_VERSION) - throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); - - LOG_INFO(log, "Removing old metadata..."); - - bool cleanup_s3 = information.source_bucket != bucket || information.source_path != remote_fs_root_path; - for (const auto & root : data_roots) - if (exists(root)) - removeSharedRecursive(root + '/', !cleanup_s3, {}); - - restoreFiles(information); - restoreFileOperations(information); - - metadata_disk->removeFile(RESTORE_FILE_NAME); - - saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); - - LOG_INFO(log, "Restore disk {} finished", name); - } - catch (const Exception &) - { - tryLogCurrentException(log, fmt::format("Failed to restore disk {}", name)); - - throw; - } -} - -void DiskS3::restoreFiles(const RestoreInformation & restore_information) -{ - LOG_INFO(log, "Starting restore files for disk {}", name); - - std::vector> results; - auto restore_files = [this, &restore_information, &results](auto list_result) - { - std::vector keys; - for (const auto & row : list_result.GetContents()) - { - const String & key = row.GetKey(); - - /// Skip file operations objects. They will be processed separately. - if (key.find("/operations/") != String::npos) - continue; - - const auto [revision, _] = extractRevisionAndOperationFromKey(key); - /// Filter early if it's possible to get revision from key. - if (revision > restore_information.revision) - continue; - - keys.push_back(key); - } - - if (!keys.empty()) - { - auto result = getExecutor().execute([this, &restore_information, keys]() - { - processRestoreFiles(restore_information.source_bucket, restore_information.source_path, keys); - }); - - results.push_back(std::move(result)); - } - - return true; - }; - - /// Execute. - listObjects(restore_information.source_bucket, restore_information.source_path, restore_files); - - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); - - LOG_INFO(log, "Files are restored for disk {}", name); -} - -void DiskS3::processRestoreFiles(const String & source_bucket, const String & source_path, Strings keys) -{ - for (const auto & key : keys) - { - auto head_result = headObject(source_bucket, key); - auto object_metadata = head_result.GetMetadata(); - - /// Restore file if object has 'path' in metadata. - auto path_entry = object_metadata.find("path"); - if (path_entry == object_metadata.end()) - { - /// Such keys can remain after migration, we can skip them. - LOG_WARNING(log, "Skip key {} because it doesn't have 'path' in metadata", key); - continue; - } - - const auto & path = path_entry->second; - - createDirectories(directoryPath(path)); - auto relative_key = shrinkKey(source_path, key); - - /// Copy object if we restore to different bucket / path. - if (bucket != source_bucket || remote_fs_root_path != source_path) - copyObject(source_bucket, key, bucket, remote_fs_root_path + relative_key, head_result); - - auto updater = [relative_key, head_result] (Metadata & metadata) - { - metadata.addObject(relative_key, head_result.GetContentLength()); - return true; - }; - - createUpdateAndStoreMetadata(path, false, updater); - - LOG_TRACE(log, "Restored file {}", path); - } -} - -void DiskS3::restoreFileOperations(const RestoreInformation & restore_information) -{ - auto settings = current_settings.get(); - - LOG_INFO(log, "Starting restore file operations for disk {}", name); - - /// Enable recording file operations if we restore to different bucket / path. - bool send_metadata = bucket != restore_information.source_bucket || remote_fs_root_path != restore_information.source_path; - - std::set renames; - auto restore_file_operations = [this, &restore_information, &renames, &send_metadata](auto list_result) - { - const String rename = "rename"; - const String hardlink = "hardlink"; - - for (const auto & row : list_result.GetContents()) - { - const String & key = row.GetKey(); - - const auto [revision, operation] = extractRevisionAndOperationFromKey(key); - if (revision == UNKNOWN_REVISION) - { - LOG_WARNING(log, "Skip key {} with unknown revision", key); - continue; - } - - /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order). - /// We can stop processing if revision of the object is already more than required. - if (revision > restore_information.revision) - return false; - - /// Keep original revision if restore to different bucket / path. - if (send_metadata) - revision_counter = revision - 1; - - auto object_metadata = headObject(restore_information.source_bucket, key).GetMetadata(); - if (operation == rename) - { - auto from_path = object_metadata["from_path"]; - auto to_path = object_metadata["to_path"]; - if (exists(from_path)) - { - moveFile(from_path, to_path, send_metadata); - LOG_TRACE(log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); - - if (restore_information.detached && isDirectory(to_path)) - { - /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way. - if (!from_path.ends_with('/')) - from_path += '/'; - if (!to_path.ends_with('/')) - to_path += '/'; - - /// Always keep latest actual directory path to avoid 'detaching' not existing paths. - auto it = renames.find(from_path); - if (it != renames.end()) - renames.erase(it); - - renames.insert(to_path); - } - } - } - else if (operation == hardlink) - { - auto src_path = object_metadata["src_path"]; - auto dst_path = object_metadata["dst_path"]; - if (exists(src_path)) - { - createDirectories(directoryPath(dst_path)); - createHardLink(src_path, dst_path, send_metadata); - LOG_TRACE(log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); - } - } - } - - return true; - }; - - /// Execute. - listObjects(restore_information.source_bucket, restore_information.source_path + "operations/", restore_file_operations); - - if (restore_information.detached) - { - Strings not_finished_prefixes{"tmp_", "delete_tmp_", "attaching_", "deleting_"}; - - for (const auto & path : renames) - { - /// Skip already detached parts. - if (path.find("/detached/") != std::string::npos) - continue; - - /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. - fs::path directory_path(path); - auto directory_name = directory_path.parent_path().filename().string(); - - auto predicate = [&directory_name](String & prefix) { return directory_name.starts_with(prefix); }; - if (std::any_of(not_finished_prefixes.begin(), not_finished_prefixes.end(), predicate)) - continue; - - auto detached_path = pathToDetached(path); - - LOG_TRACE(log, "Move directory to 'detached' {} -> {}", path, detached_path); - - fs::path from_path = fs::path(path); - fs::path to_path = fs::path(detached_path); - if (path.ends_with('/')) - to_path /= from_path.parent_path().filename(); - else - to_path /= from_path.filename(); - - /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename - if (metadata_disk->exists(to_path)) - metadata_disk->removeRecursive(to_path); - - createDirectories(directoryPath(to_path)); - metadata_disk->moveDirectory(from_path, to_path); - } - } - - LOG_INFO(log, "File operations restored for disk {}", name); -} - -std::tuple DiskS3::extractRevisionAndOperationFromKey(const String & key) -{ - String revision_str; - String operation; - - re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); - - return {(revision_str.empty() ? UNKNOWN_REVISION : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; -} - -String DiskS3::shrinkKey(const String & path, const String & key) -{ - if (!key.starts_with(path)) - throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR); - - return key.substr(path.length()); -} - -String DiskS3::revisionToString(UInt64 revision) -{ - return std::bitset<64>(revision).to_string(); -} - -String DiskS3::pathToDetached(const String & source_path) -{ - if (source_path.ends_with('/')) - return fs::path(source_path).parent_path().parent_path() / "detached/"; - return fs::path(source_path).parent_path() / "detached/"; -} - -void DiskS3::onFreeze(const String & path) -{ - createDirectories(path); - auto revision_file_buf = metadata_disk->writeFile(path + "revision.txt", 32); - writeIntText(revision_counter.load(), *revision_file_buf); - revision_file_buf->finalize(); -} - -void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) -{ - auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context_); - - current_settings.set(std::move(new_settings)); - - if (AsyncExecutor * exec = dynamic_cast(&getExecutor())) - exec->setMaxThreads(current_settings.get()->thread_pool_size); -} - -DiskS3Settings::DiskS3Settings( - const std::shared_ptr & client_, - const S3Settings::ReadWriteSettings & s3_settings_, - size_t min_bytes_for_seek_, - bool send_metadata_, - int thread_pool_size_, - int list_object_keys_size_, - int objects_chunk_size_to_delete_) - : client(client_) - , s3_settings(s3_settings_) - , min_bytes_for_seek(min_bytes_for_seek_) - , send_metadata(send_metadata_) - , thread_pool_size(thread_pool_size_) - , list_object_keys_size(list_object_keys_size_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) -{ -} - -} - -#endif diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h deleted file mode 100644 index a909d08f6fa..00000000000 --- a/src/Disks/S3/DiskS3.h +++ /dev/null @@ -1,189 +0,0 @@ -#pragma once - -#include - -#if USE_AWS_S3 - -#include -#include -#include -#include "Disks/DiskFactory.h" -#include "Disks/Executor.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -/// Settings for DiskS3 that can be changed in runtime. -struct DiskS3Settings -{ - DiskS3Settings( - const std::shared_ptr & client_, - const S3Settings::ReadWriteSettings & s3_settings_, - size_t min_bytes_for_seek_, - bool send_metadata_, - int thread_pool_size_, - int list_object_keys_size_, - int objects_chunk_size_to_delete_); - - std::shared_ptr client; - S3Settings::ReadWriteSettings s3_settings; - size_t min_bytes_for_seek; - bool send_metadata; - int thread_pool_size; - int list_object_keys_size; - int objects_chunk_size_to_delete; -}; - - -/** - * Storage for persisting data in S3 and metadata on the local disk. - * Files are represented by file in local filesystem (clickhouse_root/disks/disk_name/path/to/file) - * that contains S3 object key with actual data. - */ -class DiskS3 final : public IDiskRemote -{ -public: - using ObjectMetadata = std::map; - using Futures = std::vector>; - - using SettingsPtr = std::unique_ptr; - using GetDiskSettings = std::function; - - struct RestoreInformation; - - DiskS3( - String name_, - String bucket_, - String s3_root_path_, - String version_id_, - DiskPtr metadata_disk_, - FileCachePtr cache_, - ContextPtr context_, - SettingsPtr settings_, - GetDiskSettings settings_getter_); - - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - std::unique_ptr writeFile( - const String & path, - size_t buf_size, - WriteMode mode, - const WriteSettings & settings) override; - - void removeFromRemoteFS(const std::vector & paths) override; - - void moveFile(const String & from_path, const String & to_path, bool send_metadata); - void moveFile(const String & from_path, const String & to_path) override; - - void createHardLink(const String & src_path, const String & dst_path) override; - void createHardLink(const String & src_path, const String & dst_path, bool send_metadata); - - DiskType getType() const override { return DiskType::S3; } - bool isRemote() const override { return true; } - - bool supportZeroCopyReplication() const override { return true; } - - bool supportParallelWrite() const override { return true; } - - void shutdown() override; - - void startup() override; - - /// Check file exists and ClickHouse has an access to it - /// Overrode in remote disk - /// Required for remote disk to ensure that replica has access to data written by other node - bool checkUniqueId(const String & id) const override; - - /// Dumps current revision counter into file 'revision.txt' at given path. - void onFreeze(const String & path) override; - - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) override; - -private: - void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectMetadata & metadata); - /// Converts revision to binary string with leading zeroes (64 bit). - static String revisionToString(UInt64 revision); - - bool checkObjectExists(const String & source_bucket, const String & prefix) const; - void findLastRevision(); - - int readSchemaVersion(const String & source_bucket, const String & source_path); - void saveSchemaVersion(const int & version); - void updateObjectMetadata(const String & key, const ObjectMetadata & metadata); - void migrateFileToRestorableSchema(const String & path); - void migrateToRestorableSchemaRecursive(const String & path, Futures & results); - void migrateToRestorableSchema(); - - Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key) const; - void listObjects(const String & source_bucket, const String & source_path, std::function callback) const; - void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head = std::nullopt) const; - - void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head = std::nullopt, - std::optional> metadata = std::nullopt) const; - void copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, - std::optional head = std::nullopt, - std::optional> metadata = std::nullopt) const; - - /// Restore S3 metadata files on file system. - void restore(); - void readRestoreInformation(RestoreInformation & restore_information); - void restoreFiles(const RestoreInformation & restore_information); - void processRestoreFiles(const String & source_bucket, const String & source_path, std::vector keys); - void restoreFileOperations(const RestoreInformation & restore_information); - - /// Remove 'path' prefix from 'key' to get relative key. - /// It's needed to store keys to metadata files in RELATIVE_PATHS version. - static String shrinkKey(const String & path, const String & key); - std::tuple extractRevisionAndOperationFromKey(const String & key); - - /// Forms detached path '../../detached/part_name/' from '../../part_name/' - static String pathToDetached(const String & source_path); - - const String bucket; - - const String version_id; - - MultiVersion current_settings; - /// Gets disk settings from context. - GetDiskSettings settings_getter; - - std::atomic revision_counter = 0; - static constexpr UInt64 LATEST_REVISION = std::numeric_limits::max(); - static constexpr UInt64 UNKNOWN_REVISION = 0; - - /// File at path {metadata_path}/restore contains metadata restore information - inline static const String RESTORE_FILE_NAME = "restore"; - - /// Key has format: ../../r{revision}-{operation} - const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; - - /// Object contains information about schema version. - inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; - /// Version with possibility to backup-restore metadata. - static constexpr int RESTORABLE_SCHEMA_VERSION = 1; - /// Directories with data. - const std::vector data_roots {"data", "store"}; - - ContextPtr context; -}; - -} - -#endif diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/S3/diskSettings.cpp new file mode 100644 index 00000000000..c4cd3253a21 --- /dev/null +++ b/src/Disks/S3/diskSettings.cpp @@ -0,0 +1,127 @@ +#include + +#include + +namespace DB +{ + +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + S3Settings::ReadWriteSettings rw_settings; + rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries); + rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size); + rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor); + rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold); + rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size); + + return std::make_unique( + rw_settings, + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".thread_pool_size", 16), + config.getInt(config_prefix + ".list_object_keys_size", 1000), + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); +} + +std::shared_ptr getProxyResolverConfiguration( + const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config) +{ + auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint")); + auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme"); + if (proxy_scheme != "http" && proxy_scheme != "https") + throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS); + auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port"); + auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10); + + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", + endpoint.toString(), proxy_scheme, proxy_port); + + return std::make_shared(endpoint, proxy_scheme, proxy_port, cache_ttl); +} + +std::shared_ptr getProxyListConfiguration( + const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config) +{ + std::vector keys; + proxy_config.keys(prefix, keys); + + std::vector proxies; + for (const auto & key : keys) + if (startsWith(key, "uri")) + { + Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key)); + + if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https") + throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); + if (proxy_uri.getHost().empty()) + throw Exception("Empty host in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); + + proxies.push_back(proxy_uri); + + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString()); + } + + if (!proxies.empty()) + return std::make_shared(proxies); + + return nullptr; +} + +std::shared_ptr getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config) +{ + if (!config.has(prefix + ".proxy")) + return nullptr; + + std::vector config_keys; + config.keys(prefix + ".proxy", config_keys); + + if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver")) + { + if (resolver_configs > 1) + throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS); + + return getProxyResolverConfiguration(prefix + ".proxy.resolver", config); + } + + return getProxyListConfiguration(prefix + ".proxy", config); +} + + +std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( + config.getString(config_prefix + ".region", ""), + context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects); + + S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); + if (uri.key.back() != '/') + throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); + + client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000); + client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000); + client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); + client_configuration.endpointOverride = uri.endpoint; + + auto proxy_config = getProxyConfiguration(config_prefix, config); + if (proxy_config) + { + client_configuration.perRequestConfiguration + = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); }; + client_configuration.error_report + = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; + } + + client_configuration.retryStrategy + = std::make_shared(config.getUInt(config_prefix + ".retry_attempts", 10)); + + return S3::ClientFactory::instance().create( + client_configuration, + uri.is_virtual_hosted_style, + config.getString(config_prefix + ".access_key_id", ""), + config.getString(config_prefix + ".secret_access_key", ""), + config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""), + {}, + config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)), + config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false))); +} + +} diff --git a/src/Disks/S3/diskSettings.h b/src/Disks/S3/diskSettings.h new file mode 100644 index 00000000000..d8784d0b5a5 --- /dev/null +++ b/src/Disks/S3/diskSettings.h @@ -0,0 +1,29 @@ +#pragma once + +#include + +#if USE_AWS_S3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +} + +#endif diff --git a/src/Disks/S3/parseConfig.h b/src/Disks/S3/parseConfig.h new file mode 100644 index 00000000000..2d14ce9468b --- /dev/null +++ b/src/Disks/S3/parseConfig.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +#if USE_AWS_S3 + +#include +#include +#include +#include "Disks/DiskCacheWrapper.h" +#include "Storages/StorageS3Settings.h" +#include "ProxyConfiguration.h" +#include "ProxyListConfiguration.h" +#include "ProxyResolverConfiguration.h" +#include "Disks/DiskRestartProxy.h" +#include "Disks/DiskLocal.h" +#include "Disks/RemoteDisksCommon.h" +#include + + +namespace DB +{ + + +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +std::shared_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + + +} diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index b145f805a23..6a052dfab02 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -10,19 +10,21 @@ #include #include -#include "DiskS3.h" -#include "Disks/DiskCacheWrapper.h" -#include "Storages/StorageS3Settings.h" -#include "ProxyConfiguration.h" -#include "ProxyListConfiguration.h" -#include "ProxyResolverConfiguration.h" -#include "Disks/DiskRestartProxy.h" -#include "Disks/DiskLocal.h" -#include "Disks/RemoteDisksCommon.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace DB { + namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -31,6 +33,7 @@ namespace ErrorCodes namespace { + void checkWriteAccess(IDisk & disk) { auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); @@ -48,130 +51,8 @@ void checkReadAccess(const String & disk_name, IDisk & disk) void checkRemoveAccess(IDisk & disk) { disk.removeFile("test_acl"); } -std::shared_ptr getProxyResolverConfiguration( - const String & prefix, const Poco::Util::AbstractConfiguration & proxy_resolver_config) -{ - auto endpoint = Poco::URI(proxy_resolver_config.getString(prefix + ".endpoint")); - auto proxy_scheme = proxy_resolver_config.getString(prefix + ".proxy_scheme"); - if (proxy_scheme != "http" && proxy_scheme != "https") - throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS); - auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port"); - auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10); - - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", - endpoint.toString(), proxy_scheme, proxy_port); - - return std::make_shared(endpoint, proxy_scheme, proxy_port, cache_ttl); } -std::shared_ptr getProxyListConfiguration( - const String & prefix, const Poco::Util::AbstractConfiguration & proxy_config) -{ - std::vector keys; - proxy_config.keys(prefix, keys); - - std::vector proxies; - for (const auto & key : keys) - if (startsWith(key, "uri")) - { - Poco::URI proxy_uri(proxy_config.getString(prefix + "." + key)); - - if (proxy_uri.getScheme() != "http" && proxy_uri.getScheme() != "https") - throw Exception("Only HTTP/HTTPS schemas allowed in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); - if (proxy_uri.getHost().empty()) - throw Exception("Empty host in proxy uri: " + proxy_uri.toString(), ErrorCodes::BAD_ARGUMENTS); - - proxies.push_back(proxy_uri); - - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy: {}", proxy_uri.toString()); - } - - if (!proxies.empty()) - return std::make_shared(proxies); - - return nullptr; -} - -std::shared_ptr getProxyConfiguration(const String & prefix, const Poco::Util::AbstractConfiguration & config) -{ - if (!config.has(prefix + ".proxy")) - return nullptr; - - std::vector config_keys; - config.keys(prefix + ".proxy", config_keys); - - if (auto resolver_configs = std::count(config_keys.begin(), config_keys.end(), "resolver")) - { - if (resolver_configs > 1) - throw Exception("Multiple proxy resolver configurations aren't allowed", ErrorCodes::BAD_ARGUMENTS); - - return getProxyResolverConfiguration(prefix + ".proxy.resolver", config); - } - - return getProxyListConfiguration(prefix + ".proxy", config); -} - -std::shared_ptr -getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) -{ - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - config.getString(config_prefix + ".region", ""), - context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects); - - S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); - if (uri.key.back() != '/') - throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); - - client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000); - client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000); - client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); - client_configuration.endpointOverride = uri.endpoint; - - auto proxy_config = getProxyConfiguration(config_prefix, config); - if (proxy_config) - { - client_configuration.perRequestConfiguration - = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); }; - client_configuration.error_report - = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; - } - - client_configuration.retryStrategy - = std::make_shared(config.getUInt(config_prefix + ".retry_attempts", 10)); - - return S3::ClientFactory::instance().create( - client_configuration, - uri.is_virtual_hosted_style, - config.getString(config_prefix + ".access_key_id", ""), - config.getString(config_prefix + ".secret_access_key", ""), - config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""), - {}, - config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)), - config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false))); -} - -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) -{ - S3Settings::ReadWriteSettings rw_settings; - rw_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries); - rw_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size); - rw_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor); - rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold); - rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size); - - return std::make_unique( - getClient(config, config_prefix, context), - rw_settings, - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getBool(config_prefix + ".send_metadata", false), - config.getInt(config_prefix + ".thread_pool_size", 16), - config.getInt(config_prefix + ".list_object_keys_size", 1000), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); -} - -} - - void registerDiskS3(DiskFactory & factory) { auto creator = [](const String & name, @@ -191,16 +72,21 @@ void registerDiskS3(DiskFactory & factory) FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); - std::shared_ptr s3disk = std::make_shared( - name, - uri.bucket, - uri.key, - uri.version_id, - metadata_disk, - std::move(cache), - context, + bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); + + ObjectStoragePtr s3_storage = std::make_unique( + std::move(cache), getClient(config, config_prefix, context), getSettings(config, config_prefix, context), - getSettings); + uri.version_id, uri.bucket); + + std::shared_ptr s3disk = std::make_shared( + name, + uri.key, + "DiskS3", + metadata_disk, + std::move(s3_storage), + DiskType::S3, + send_metadata); /// This code is used only to check access to the corresponding disk. if (!config.getBool(config_prefix + ".skip_access_check", false)) diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp new file mode 100644 index 00000000000..c81e5549c92 --- /dev/null +++ b/src/Disks/S3ObjectStorage.cpp @@ -0,0 +1,436 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int S3_ERROR; + extern const int FILE_ALREADY_EXISTS; + extern const int UNKNOWN_FORMAT; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +namespace +{ + +template +void throwIfError(Aws::Utils::Outcome & response) +{ + if (!response.IsSuccess()) + { + const auto & err = response.GetError(); + throw Exception(std::to_string(static_cast(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR); + } +} + +template +void throwIfError(const Aws::Utils::Outcome & response) +{ + if (!response.IsSuccess()) + { + const auto & err = response.GetError(); + throw Exception(err.GetMessage(), static_cast(err.GetErrorType())); + } +} + +} + +Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const +{ + auto client_ptr = client.get(); + Aws::S3::Model::HeadObjectRequest request; + request.SetBucket(bucket_from); + request.SetKey(key); + + return client_ptr->HeadObject(request); +} + +bool S3ObjectStorage::exists(const std::string & path) const +{ + auto object_head = requestObjectHeadData(bucket, path); + if (!object_head.IsSuccess()) + { + if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + return false; + + throwIfError(object_head); + } + return true; +} + + +std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + + ReadSettings disk_read_settings{read_settings}; + if (cache) + { + if (IFileCache::isReadOnly()) + disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + + disk_read_settings.remote_fs_cache = cache; + } + + auto settings_ptr = s3_settings.get(); + + auto s3_impl = std::make_unique( + client.get(), bucket, version_id, common_path_prefix, blobs_to_read, + settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); + + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + { + auto reader = getThreadPoolReader(); + return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); + } + else + { + auto buf = std::make_unique(std::move(s3_impl)); + return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); + } +} + +std::unique_ptr S3ObjectStorage::readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto settings_ptr = s3_settings.get(); + return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, read_settings); +} + + +std::unique_ptr S3ObjectStorage::writeObject( + const std::string & path, + std::optional attributes, + FinalizeCallback && finalize_callback, + size_t buf_size, + const WriteSettings & write_settings) +{ + bool cache_on_write = cache + && fs::path(path).extension() != ".tmp" + && write_settings.enable_filesystem_cache_on_write_operations + && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; + + auto settings_ptr = s3_settings.get(); + auto s3_buffer = std::make_unique( + client.get(), + bucket, + path, + settings_ptr->s3_settings, + attributes, + buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), + cache_on_write ? cache : nullptr); + + return std::make_unique(std::move(s3_buffer), std::move(finalize_callback), path); +} + +void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +{ + auto settings_ptr = s3_settings.get(); + auto client_ptr = client.get(); + + Aws::S3::Model::ListObjectsV2Request request; + request.SetBucket(bucket); + request.SetPrefix(path); + request.SetMaxKeys(settings_ptr->list_object_keys_size); + + Aws::S3::Model::ListObjectsV2Outcome outcome; + do + { + outcome = client_ptr->ListObjectsV2(request); + throwIfError(outcome); + + auto result = outcome.GetResult(); + auto objects = result.GetContents(); + for (const auto & object : objects) + children.emplace_back(object.GetKey(), object.GetSize()); + + if (objects.empty()) + break; + + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); + } while (outcome.GetResult().GetIsTruncated()); +} + +void S3ObjectStorage::removeObject(const std::string & path) +{ + auto client_ptr = client.get(); + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(path); + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects({obj}); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + + throwIfError(outcome); +} + +void S3ObjectStorage::removeObjects(const std::vector & paths) +{ + if (paths.empty()) + return; + + auto client_ptr = client.get(); + std::vector keys; + keys.reserve(paths.size()); + + for (const auto & path : paths) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(path); + keys.push_back(obj); + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(keys); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + + throwIfError(outcome); + +} + +void S3ObjectStorage::removeObjectIfExists(const std::string & path) +{ + auto client_ptr = client.get(); + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(path); + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects({obj}); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + client_ptr->DeleteObjects(request); +} + +void S3ObjectStorage::removeObjectsIfExist(const std::vector & paths) +{ + if (paths.empty()) + return; + + auto client_ptr = client.get(); + + std::vector keys; + keys.reserve(paths.size()); + for (const auto & path : paths) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(path); + keys.push_back(obj); + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(keys); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + + throwIfError(outcome); +} + +ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const +{ + ObjectMetadata result; + + auto object_head = requestObjectHeadData(bucket, path); + throwIfError(object_head); + + auto & object_head_result = object_head.GetResult(); + result.size_bytes = object_head_result.GetContentLength(); + result.last_modified = object_head_result.GetLastModified().Millis(); + result.attributes = object_head_result.GetMetadata(); + + return result; +} + +void S3ObjectStorage::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head, + std::optional metadata) const +{ + auto client_ptr = client.get(); + Aws::S3::Model::CopyObjectRequest request; + request.SetCopySource(src_bucket + "/" + src_key); + request.SetBucket(dst_bucket); + request.SetKey(dst_key); + if (metadata) + { + request.SetMetadata(*metadata); + request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); + } + + auto outcome = client_ptr->CopyObject(request); + + if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge") + { // Can't come here with MinIO, MinIO allows single part upload for large objects. + copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata); + return; + } + + throwIfError(outcome); +} + +void S3ObjectStorage::copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head, + std::optional metadata) const +{ + if (!head) + head = requestObjectHeadData(src_bucket, src_key).GetResult(); + + auto settings_ptr = s3_settings.get(); + auto client_ptr = client.get(); + size_t size = head->GetContentLength(); + + String multipart_upload_id; + + { + Aws::S3::Model::CreateMultipartUploadRequest request; + request.SetBucket(dst_bucket); + request.SetKey(dst_key); + if (metadata) + request.SetMetadata(*metadata); + + auto outcome = client_ptr->CreateMultipartUpload(request); + + throwIfError(outcome); + + multipart_upload_id = outcome.GetResult().GetUploadId(); + } + + std::vector part_tags; + + size_t upload_part_size = settings_ptr->s3_settings.min_upload_part_size; + for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) + { + Aws::S3::Model::UploadPartCopyRequest part_request; + part_request.SetCopySource(src_bucket + "/" + src_key); + part_request.SetBucket(dst_bucket); + part_request.SetKey(dst_key); + part_request.SetUploadId(multipart_upload_id); + part_request.SetPartNumber(part_number); + part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1)); + + auto outcome = client_ptr->UploadPartCopy(part_request); + if (!outcome.IsSuccess()) + { + Aws::S3::Model::AbortMultipartUploadRequest abort_request; + abort_request.SetBucket(dst_bucket); + abort_request.SetKey(dst_key); + abort_request.SetUploadId(multipart_upload_id); + client_ptr->AbortMultipartUpload(abort_request); + // In error case we throw exception later with first error from UploadPartCopy + } + throwIfError(outcome); + + auto etag = outcome.GetResult().GetCopyPartResult().GetETag(); + part_tags.push_back(etag); + } + + { + Aws::S3::Model::CompleteMultipartUploadRequest req; + req.SetBucket(dst_bucket); + req.SetKey(dst_key); + req.SetUploadId(multipart_upload_id); + + Aws::S3::Model::CompletedMultipartUpload multipart_upload; + for (size_t i = 0; i < part_tags.size(); ++i) + { + Aws::S3::Model::CompletedPart part; + multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); + } + + req.SetMultipartUpload(multipart_upload); + + auto outcome = client_ptr->CompleteMultipartUpload(req); + + throwIfError(outcome); + } +} + +void S3ObjectStorage::copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes) +{ + auto head = requestObjectHeadData(bucket, object_from).GetResult(); + if (head.GetContentLength() >= static_cast(5UL * 1024 * 1024 * 1024)) + copyObjectMultipartImpl(bucket, object_from, bucket, object_to, head, object_to_attributes); + else + copyObjectImpl(bucket, object_from, bucket, object_to, head, object_to_attributes); +} + +void S3ObjectStorage::setNewSettings(std::unique_ptr && s3_settings_) +{ + s3_settings.set(std::move(s3_settings_)); +} + +void S3ObjectStorage::setNewClient(std::unique_ptr && client_) +{ + client.set(std::move(client_)); +} + +void S3ObjectStorage::shutdown() +{ + auto client_ptr = client.get(); + /// This call stops any next retry attempts for ongoing S3 requests. + /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome. + /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors. + /// This should significantly speed up shutdown process if S3 is unhealthy. + const_cast(*client_ptr).DisableRequestProcessing(); +} + +void S3ObjectStorage::startup() +{ + auto client_ptr = client.get(); + + /// Need to be enabled if it was disabled during shutdown() call. + const_cast(*client_ptr.get()).EnableRequestProcessing(); +} + +void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + s3_settings.set(getSettings(config, config_prefix, context)); + client.set(getClient(config, config_prefix, context)); +} + +} diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h new file mode 100644 index 00000000000..b0762d07535 --- /dev/null +++ b/src/Disks/S3ObjectStorage.h @@ -0,0 +1,130 @@ +#pragma once +#include + +#include + +#if USE_AWS_S3 + +#include +#include +#include +#include +#include + + +namespace DB +{ + +struct S3ObjectStorageSettings +{ + + S3ObjectStorageSettings() = default; + + S3ObjectStorageSettings( + const S3Settings::ReadWriteSettings & s3_settings_, + uint64_t min_bytes_for_seek_, + uint64_t thread_pool_size_, + int32_t list_object_keys_size_, + int32_t objects_chunk_size_to_delete_) + : s3_settings(s3_settings_) + , min_bytes_for_seek(min_bytes_for_seek_) + , thread_pool_size(thread_pool_size_) + , list_object_keys_size(list_object_keys_size_) + , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) + {} + + S3Settings::ReadWriteSettings s3_settings; + + uint64_t min_bytes_for_seek; + uint64_t thread_pool_size; + int32_t list_object_keys_size; + int32_t objects_chunk_size_to_delete; +}; + + +class S3ObjectStorage : public IObjectStorage +{ +public: + S3ObjectStorage( + FileCachePtr && cache_, + std::unique_ptr && client_, + std::unique_ptr && s3_settings_, + String version_id_, + String bucket_) + : IObjectStorage(std::move(cache_)) + , bucket(bucket_) + , client(std::move(client_)) + , s3_settings(std::move(s3_settings_)) + , version_id(std::move(version_id_)) + {} + + bool exists(const std::string & path) const override; + + std::unique_ptr readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + std::unique_ptr readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + /// Open the file for write and return WriteBufferFromFileBase object. + std::unique_ptr writeObject( /// NOLINT + const std::string & path, + std::optional attributes = {}, + FinalizeCallback && finalize_callback = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + const WriteSettings & write_settings = {}) override; + + void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + /// Remove file. Throws exception if file doesn't exists or it's a directory. + void removeObject(const std::string & path) override; + + void removeObjects(const std::vector & paths) override; + + void removeObjectIfExists(const std::string & path) override; + + void removeObjectsIfExist(const std::vector & paths) override; + + ObjectMetadata getObjectMetadata(const std::string & path) const override; + + void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) override; + + void setNewSettings(std::unique_ptr && s3_settings_); + + void setNewClient(std::unique_ptr && client_); + + void shutdown() override; + + void startup() override; + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + +private: + + void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head = std::nullopt, + std::optional metadata = std::nullopt) const; + + void copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head = std::nullopt, + std::optional metadata = std::nullopt) const; + + Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const; + + std::string bucket; + + MultiVersion client; + MultiVersion s3_settings; + + const String version_id; +}; + +} + +#endif diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index c1b2ec7db0f..5b1d278f4c2 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -37,7 +37,7 @@ namespace ErrorCodes ReadBufferFromS3::ReadBufferFromS3( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const String & version_id_, diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 5c8396cd6d8..c5f72c7414f 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -29,7 +29,7 @@ namespace DB class ReadBufferFromS3 : public SeekableReadBuffer, public WithFileName, public WithFileSize { private: - std::shared_ptr client_ptr; + std::shared_ptr client_ptr; String bucket; String key; String version_id; @@ -48,7 +48,7 @@ private: public: ReadBufferFromS3( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const String & version_id_, @@ -94,7 +94,7 @@ class ReadBufferS3Factory : public ParallelReadBuffer::ReadBufferFactory, public { public: explicit ReadBufferS3Factory( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const String & version_id_, @@ -125,7 +125,7 @@ public: String getFileName() const override { return bucket + "/" + key; } private: - std::shared_ptr client_ptr; + std::shared_ptr client_ptr; const String bucket; const String key; const String version_id; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index c277332ef03..3732b662ea2 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -708,7 +708,7 @@ namespace S3 return ret; } - std::shared_ptr ClientFactory::create( // NOLINT + std::unique_ptr ClientFactory::create( // NOLINT const PocoHTTPClientConfiguration & cfg_, bool is_virtual_hosted_style, const String & access_key_id, @@ -746,7 +746,7 @@ namespace S3 use_environment_credentials, use_insecure_imds_request); - return std::make_shared( + return std::make_unique( std::move(auth_signer), std::move(client_configuration), // Client configuration. is_virtual_hosted_style || client_configuration.endpointOverride.empty() // Use virtual addressing only if endpoint is not specified. @@ -856,7 +856,7 @@ namespace S3 quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : ""); } - size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error) + size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error) { Aws::S3::Model::HeadObjectRequest req; req.SetBucket(bucket); diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 16134f173d5..01f77cff820 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -31,7 +31,7 @@ public: static ClientFactory & instance(); - std::shared_ptr create( + std::unique_ptr create( const PocoHTTPClientConfiguration & cfg, bool is_virtual_hosted_style, const String & access_key_id, @@ -76,7 +76,7 @@ struct URI static void validateBucket(const String & bucket, const Poco::URI & uri); }; -size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id = {}, bool throw_on_error = true); +size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id = {}, bool throw_on_error = true); } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 342a512ee52..8012ad95ec7 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -54,7 +54,7 @@ struct WriteBufferFromS3::PutObjectTask }; WriteBufferFromS3::WriteBufferFromS3( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const S3Settings::ReadWriteSettings & s3_settings_, @@ -65,10 +65,10 @@ WriteBufferFromS3::WriteBufferFromS3( : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) - , object_metadata(std::move(object_metadata_)) , client_ptr(std::move(client_ptr_)) , upload_part_size(s3_settings_.min_upload_part_size) , s3_settings(s3_settings_) + , object_metadata(std::move(object_metadata_)) , schedule(std::move(schedule_)) , cache(cache_) { diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 6279e519be0..7dbaad72940 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -48,7 +48,7 @@ class WriteBufferFromS3 final : public BufferWithOwnMemory { public: WriteBufferFromS3( - std::shared_ptr client_ptr_, + std::shared_ptr client_ptr_, const String & bucket_, const String & key_, const S3Settings::ReadWriteSettings & s3_settings_, @@ -90,10 +90,11 @@ private: String bucket; String key; - std::optional> object_metadata; - std::shared_ptr client_ptr; + std::shared_ptr client_ptr; size_t upload_part_size = 0; S3Settings::ReadWriteSettings s3_settings; + std::optional> object_metadata; + /// Buffer to accumulate data. std::shared_ptr temporary_buffer; size_t last_part_size = 0; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 87be99aa246..691759892eb 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -184,7 +184,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() if (storage_settings_ptr->allow_remote_fs_zero_copy_replication) { - if (auto disk = reserved_space->getDisk(); disk->getType() == DB::DiskType::S3) + if (auto disk = reserved_space->getDisk(); disk->supportZeroCopyReplication()) { String dummy; if (!storage.findReplicaHavingCoveringPart(entry.new_part_name, true, dummy).empty()) diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index ff08ba0c062..f46bce1c87b 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -110,7 +110,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() if (storage_settings_ptr->allow_remote_fs_zero_copy_replication) { - if (auto disk = reserved_space->getDisk(); disk->getType() == DB::DiskType::S3) + if (auto disk = reserved_space->getDisk(); disk->supportZeroCopyReplication()) { String dummy; if (!storage.findReplicaHavingCoveringPart(entry.new_part_name, true, dummy).empty()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 0e7c83742a4..fbdb1dabd88 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1219,7 +1219,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto disks = storage.getDisks(); bool only_s3_storage = true; for (const auto & disk : disks) - if (disk->getType() != DB::DiskType::S3) + if (!disk->supportZeroCopyReplication()) only_s3_storage = false; if (!disks.empty() && only_s3_storage && storage.checkZeroCopyLockExists(entry.new_part_name, disks[0])) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 36080485aca..a21b5fd9efa 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7760,7 +7760,8 @@ String StorageReplicatedMergeTree::getSharedDataReplica( } -Strings StorageReplicatedMergeTree::getZeroCopyPartPath(const MergeTreeSettings & settings, DiskType disk_type, const String & table_uuid, +Strings StorageReplicatedMergeTree::getZeroCopyPartPath( + const MergeTreeSettings & settings, DiskType disk_type, const String & table_uuid, const String & part_name, const String & zookeeper_path_old) { Strings res; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6107c1a5117..2ab553ad450 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -84,7 +84,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl { public: - Impl(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) + Impl(const Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) : client(client_), globbed_uri(globbed_uri_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) @@ -171,7 +171,7 @@ private: bool is_finished{false}; }; -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) +StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(const Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) : pimpl(std::make_shared(client_, globbed_uri_)) {} String StorageS3Source::DisclosedGlobIterator::next() @@ -260,7 +260,7 @@ StorageS3Source::StorageS3Source( UInt64 max_block_size_, UInt64 max_single_read_retries_, String compression_hint_, - const std::shared_ptr & client_, + const std::shared_ptr & client_, const String & bucket_, const String & version_id_, std::shared_ptr file_iterator_, @@ -397,7 +397,7 @@ Chunk StorageS3Source::generate() return {}; } -static bool checkIfObjectExists(const std::shared_ptr & client, const String & bucket, const String & key) +static bool checkIfObjectExists(const std::shared_ptr & client, const String & bucket, const String & key) { bool is_finished = false; Aws::S3::Model::ListObjectsV2Request request; @@ -548,7 +548,6 @@ private: const Block sample_block; ContextPtr context; const CompressionMethod compression_method; - const StorageS3::S3Configuration & s3_configuration; const String bucket; const String key; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index cac5b3c270f..3fde17682d0 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -35,7 +35,7 @@ public: class DisclosedGlobIterator { public: - DisclosedGlobIterator(Aws::S3::S3Client &, const S3::URI &); + DisclosedGlobIterator(const Aws::S3::S3Client &, const S3::URI &); String next(); private: class Impl; @@ -82,7 +82,7 @@ public: UInt64 max_block_size_, UInt64 max_single_read_retries_, String compression_hint_, - const std::shared_ptr & client_, + const std::shared_ptr & client_, const String & bucket, const String & version_id, std::shared_ptr file_iterator_, @@ -104,7 +104,7 @@ private: UInt64 max_block_size; UInt64 max_single_read_retries; String compression_hint; - std::shared_ptr client; + std::shared_ptr client; Block sample_block; std::optional format_settings; @@ -191,7 +191,7 @@ public: const S3::URI uri; const String access_key_id; const String secret_access_key; - std::shared_ptr client; + std::shared_ptr client; S3Settings::AuthSettings auth_settings; S3Settings::ReadWriteSettings rw_settings; }; diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index b7ef3ce3ef2..c7041c05403 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -214,8 +214,9 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): # Wait for merges and old parts deletion for attempt in range(0, 10): parts_count = node.query( - "SELECT COUNT(*) FROM system.parts WHERE table = 's3_test' FORMAT Values" + "SELECT COUNT(*) FROM system.parts WHERE table = 's3_test' and active = 1 FORMAT Values" ) + if parts_count == "(1)": break @@ -228,7 +229,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): assert ( node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)" ) - wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD) + wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD, timeout=45) @pytest.mark.parametrize("node_name", ["node"]) From 5115fa8c6fa325db5ff3cb3560981fdcd1ff449a Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 12 May 2022 09:32:21 +0800 Subject: [PATCH 108/615] update test case tests/integration/test_hive_query/test.py --- tests/integration/test_hive_query/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 374a86d51e8..8e663873f11 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -375,7 +375,7 @@ def test_cache_read_bytes(started_cluster): for i in range(10): result = node.query( """ - SELECT day, count(*) FROM default.demo_parquet_1 group by day order by day settings input_format_parquet_allow_missing_columns = true + SELECT * FROM default.demo_parquet_1 settings input_format_parquet_allow_missing_columns = true """ ) node.query("system flush logs") From 98f9cd4a51047bc187838658229d125301249142 Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 12 May 2022 10:34:44 +0800 Subject: [PATCH 109/615] fix windowview populate --- src/Parsers/ASTCreateQuery.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 23881cd3fbb..011aa454095 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -400,9 +400,6 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat if (dictionary) dictionary->formatImpl(settings, state, frame); - if (is_populate) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); - if (is_watermark_strictly_ascending) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " WATERMARK STRICTLY_ASCENDING" << (settings.hilite ? hilite_none : ""); @@ -423,6 +420,9 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat lateness_function->formatImpl(settings, state, frame); } + if (is_populate) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); + if (select) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" From 5211b12bdd96e72e45facbad07dc05da2f536197 Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 12 May 2022 13:13:36 +0800 Subject: [PATCH 110/615] fix windowview cleanup for database replicated --- src/Storages/WindowView/StorageWindowView.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 4d5502db895..975742cb771 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -936,7 +936,9 @@ void StorageWindowView::updateMaxWatermark(UInt32 watermark) inline void StorageWindowView::cleanup() { - InterpreterAlterQuery alter_query(getCleanupQuery(), getContext()); + auto cleanup_context = Context::createCopy(getContext()); + cleanup_context->getClientInfo().query_kind = ClientInfo::QueryKind::INITIAL_QUERY; + InterpreterAlterQuery alter_query(getCleanupQuery(), cleanup_context); alter_query.execute(); std::lock_guard lock(fire_signal_mutex); From 4eb55d17b3772d4b2b8b11ea9101ed370a27362c Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Thu, 12 May 2022 02:19:05 +0900 Subject: [PATCH 111/615] consolidate hashid test queries and hold out from fasttest --- tests/queries/0_stateless/02293_hashid.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql index e6ee89e8d1a..773afc1e0de 100644 --- a/tests/queries/0_stateless/02293_hashid.sql +++ b/tests/queries/0_stateless/02293_hashid.sql @@ -3,4 +3,3 @@ select number, hashid(number) from system.numbers limit 5; select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; select hashid(1234567890123456, 's3cr3t'); -select hashid(-1); From ea389eeacaea7d2511f9375dd6e2ffda823aa89c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 12 May 2022 12:44:32 +0200 Subject: [PATCH 112/615] Fix fast test build --- src/Disks/S3ObjectStorage.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index c81e5549c92..a941022a574 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -1,4 +1,8 @@ #include + + +#if USE_AWS_S3 + #include #include #include @@ -434,3 +438,5 @@ void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & } } + +#endif From 9125b95aa28237f081316a49b86fc01997b1a891 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 12 May 2022 13:28:55 +0200 Subject: [PATCH 113/615] Delete out --- tests/queries/0_stateless/out | 87 ----------------------------------- 1 file changed, 87 deletions(-) delete mode 100644 tests/queries/0_stateless/out diff --git a/tests/queries/0_stateless/out b/tests/queries/0_stateless/out deleted file mode 100644 index 9bb2afe7104..00000000000 --- a/tests/queries/0_stateless/out +++ /dev/null @@ -1,87 +0,0 @@ -Arrow -b'ARROW1\x00\x00\xff\xff\xff\xff\x08\x01\x00\x00\x10\x00\x00\x00\x00\x00\n\x00\x0c\x00\x06\x00\x05\x00\x08\x00\n\x00\x00\x00\x00\x01\x04\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x03\x00\x00\x00\xa8\x00\x00\x00l\x00\x00\x00\x04\x00\x00\x00t\xff\xff\xff\x00\x00\x00\x0c\x14\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00 \x00\x00\x00\x03\x00\x00\x00arr\x00\xa0\xff\xff\xff\x10\x00\x14\x00\x08\x00\x06\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x01\x02\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00item\x00\x00\x00\x00\x9e\xff\xff\xff@\x00\x00\x00\xd8\xff\xff\xff\x00\x00\x00\x04\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00s\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x10\x00\x00\x00\x1c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00id\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00@\x00\x00\x00\xff\xff\xff\xff(\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x04\x00\x18\x00\x00\x00(\x02\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x18\x00\x0c\x00\x04\x00\x08\x00\n\x00\x00\x00\xac\x00\x00\x00\x10\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00h\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x07\x00\x00\x00\x08\x00\x00\x00\t\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x000123456789\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x06\x00\x00\x00\n\x00\x00\x00\x0f\x00\x00\x00\x15\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00\x10\x00\x00\x00\x0c\x00\x14\x00\x06\x00\x08\x00\x0c\x00\x10\x00\x0c\x00\x00\x00\x00\x00\x04\x00<\x00\x00\x00(\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x18\x01\x00\x00\x00\x00\x00\x000\x01\x00\x00\x00\x00\x00\x00(\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x03\x00\x00\x00\xa8\x00\x00\x00l\x00\x00\x00\x04\x00\x00\x00t\xff\xff\xff\x00\x00\x00\x0c\x14\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00 \x00\x00\x00\x03\x00\x00\x00arr\x00\xa0\xff\xff\xff\x10\x00\x14\x00\x08\x00\x06\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x01\x02\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00item\x00\x00\x00\x00\x9e\xff\xff\xff@\x00\x00\x00\xd8\xff\xff\xff\x00\x00\x00\x04\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00s\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x10\x00\x00\x00\x1c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00id\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00@\x00\x00\x008\x01\x00\x00ARROW1' -ArrowStream -b'\xff\xff\xff\xff\x08\x01\x00\x00\x10\x00\x00\x00\x00\x00\n\x00\x0c\x00\x06\x00\x05\x00\x08\x00\n\x00\x00\x00\x00\x01\x04\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x03\x00\x00\x00\xa8\x00\x00\x00l\x00\x00\x00\x04\x00\x00\x00t\xff\xff\xff\x00\x00\x00\x0c\x14\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00 \x00\x00\x00\x03\x00\x00\x00arr\x00\xa0\xff\xff\xff\x10\x00\x14\x00\x08\x00\x06\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x01\x02\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00item\x00\x00\x00\x00\x9e\xff\xff\xff@\x00\x00\x00\xd8\xff\xff\xff\x00\x00\x00\x04\x10\x00\x00\x00\x18\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00s\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x10\x00\x00\x00\x1c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00id\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00@\x00\x00\x00\xff\xff\xff\xff(\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x04\x00\x18\x00\x00\x00(\x02\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x18\x00\x0c\x00\x04\x00\x08\x00\n\x00\x00\x00\xac\x00\x00\x00\x10\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00h\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x07\x00\x00\x00\x08\x00\x00\x00\t\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x000123456789\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x06\x00\x00\x00\n\x00\x00\x00\x0f\x00\x00\x00\x15\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00' -Avro -b'Obj\x01\x04\x14avro.codec\x0csnappy\x16avro.schema\xb0\x02{"type":"record","name":"row","fields":[{"name":"id","type":"long"},{"name":"s","type":"string"},{"name":"arr","type":{"type":"array","items":"long"}}]}\x00VF=&\xb7\x91.u\xb7/\xe8{\xbe%\x91\xbb\x14\xbc\x01^\xc0\x00\x020\x00\x02\x021\x02\x00\x00\x04\x022\x04\x00\x02\x00\x06\x023\x06\x00\x02\x04\x00\x08\x024\x08\x00\x02\x04\x06\x00\n\x025\n\x00\x02\x04\x06\x08\x00\x0c\x026\x0c\x00\x01\n\x14\n\x00\x0e\x027\x0e\x01\x1e\x1c\x08\n\x0c\x00\x10\x028\x10\r\x0c<\x0e\x00\x12\x029\x12\x00\x02\x04\x06\x08\n\x0c\x0e\x10\x00x\xc9)kVF=&\xb7\x91.u\xb7/\xe8{\xbe%\x91\xbb' -CSV -b'0,"0","[]"\n1,"1","[0]"\n2,"2","[0,1]"\n3,"3","[0,1,2]"\n4,"4","[0,1,2,3]"\n5,"5","[0,1,2,3,4]"\n6,"6","[0,1,2,3,4,5]"\n7,"7","[0,1,2,3,4,5,6]"\n8,"8","[0,1,2,3,4,5,6,7]"\n9,"9","[0,1,2,3,4,5,6,7,8]"\n' -CSVWithNames -b'"id","s","arr"\n0,"0","[]"\n1,"1","[0]"\n2,"2","[0,1]"\n3,"3","[0,1,2]"\n4,"4","[0,1,2,3]"\n5,"5","[0,1,2,3,4]"\n6,"6","[0,1,2,3,4,5]"\n7,"7","[0,1,2,3,4,5,6]"\n8,"8","[0,1,2,3,4,5,6,7]"\n9,"9","[0,1,2,3,4,5,6,7,8]"\n' -CSVWithNamesAndTypes -b'"id","s","arr"\n"UInt64","String","Array(UInt64)"\n0,"0","[]"\n1,"1","[0]"\n2,"2","[0,1]"\n3,"3","[0,1,2]"\n4,"4","[0,1,2,3]"\n5,"5","[0,1,2,3,4]"\n6,"6","[0,1,2,3,4,5]"\n7,"7","[0,1,2,3,4,5,6]"\n8,"8","[0,1,2,3,4,5,6,7]"\n9,"9","[0,1,2,3,4,5,6,7,8]"\n' -CustomSeparated -b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -CustomSeparatedWithNames -b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -CustomSeparatedWithNamesAndTypes -b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -JSONColumns -b'{\n\t"id": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],\n\t"s": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],\n\t"arr": [[], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"], ["0","1","2","3","4","5"], ["0","1","2","3","4","5","6"], ["0","1","2","3","4","5","6","7"], ["0","1","2","3","4","5","6","7","8"]]\n}\n' -JSONCompactColumns -b'[\n\t["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],\n\t["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],\n\t[[], ["0"], ["0","1"], ["0","1","2"], ["0","1","2","3"], ["0","1","2","3","4"], ["0","1","2","3","4","5"], ["0","1","2","3","4","5","6"], ["0","1","2","3","4","5","6","7"], ["0","1","2","3","4","5","6","7","8"]]\n]\n' -JSONCompactEachRow -b'["0", "0", []]\n["1", "1", ["0"]]\n["2", "2", ["0","1"]]\n["3", "3", ["0","1","2"]]\n["4", "4", ["0","1","2","3"]]\n["5", "5", ["0","1","2","3","4"]]\n["6", "6", ["0","1","2","3","4","5"]]\n["7", "7", ["0","1","2","3","4","5","6"]]\n["8", "8", ["0","1","2","3","4","5","6","7"]]\n["9", "9", ["0","1","2","3","4","5","6","7","8"]]\n' -JSONCompactEachRowWithNames -b'["id", "s", "arr"]\n["0", "0", []]\n["1", "1", ["0"]]\n["2", "2", ["0","1"]]\n["3", "3", ["0","1","2"]]\n["4", "4", ["0","1","2","3"]]\n["5", "5", ["0","1","2","3","4"]]\n["6", "6", ["0","1","2","3","4","5"]]\n["7", "7", ["0","1","2","3","4","5","6"]]\n["8", "8", ["0","1","2","3","4","5","6","7"]]\n["9", "9", ["0","1","2","3","4","5","6","7","8"]]\n' -JSONCompactEachRowWithNamesAndTypes -b'["id", "s", "arr"]\n["UInt64", "String", "Array(UInt64)"]\n["0", "0", []]\n["1", "1", ["0"]]\n["2", "2", ["0","1"]]\n["3", "3", ["0","1","2"]]\n["4", "4", ["0","1","2","3"]]\n["5", "5", ["0","1","2","3","4"]]\n["6", "6", ["0","1","2","3","4","5"]]\n["7", "7", ["0","1","2","3","4","5","6"]]\n["8", "8", ["0","1","2","3","4","5","6","7"]]\n["9", "9", ["0","1","2","3","4","5","6","7","8"]]\n' -JSONCompactStringsEachRow -b'["0", "0", "[]"]\n["1", "1", "[0]"]\n["2", "2", "[0,1]"]\n["3", "3", "[0,1,2]"]\n["4", "4", "[0,1,2,3]"]\n["5", "5", "[0,1,2,3,4]"]\n["6", "6", "[0,1,2,3,4,5]"]\n["7", "7", "[0,1,2,3,4,5,6]"]\n["8", "8", "[0,1,2,3,4,5,6,7]"]\n["9", "9", "[0,1,2,3,4,5,6,7,8]"]\n' -JSONCompactStringsEachRowWithNames -b'["id", "s", "arr"]\n["0", "0", "[]"]\n["1", "1", "[0]"]\n["2", "2", "[0,1]"]\n["3", "3", "[0,1,2]"]\n["4", "4", "[0,1,2,3]"]\n["5", "5", "[0,1,2,3,4]"]\n["6", "6", "[0,1,2,3,4,5]"]\n["7", "7", "[0,1,2,3,4,5,6]"]\n["8", "8", "[0,1,2,3,4,5,6,7]"]\n["9", "9", "[0,1,2,3,4,5,6,7,8]"]\n' -JSONCompactStringsEachRowWithNamesAndTypes -b'["id", "s", "arr"]\n["UInt64", "String", "Array(UInt64)"]\n["0", "0", "[]"]\n["1", "1", "[0]"]\n["2", "2", "[0,1]"]\n["3", "3", "[0,1,2]"]\n["4", "4", "[0,1,2,3]"]\n["5", "5", "[0,1,2,3,4]"]\n["6", "6", "[0,1,2,3,4,5]"]\n["7", "7", "[0,1,2,3,4,5,6]"]\n["8", "8", "[0,1,2,3,4,5,6,7]"]\n["9", "9", "[0,1,2,3,4,5,6,7,8]"]\n' -JSONEachRow -b'{"id":"0","s":"0","arr":[]}\n{"id":"1","s":"1","arr":["0"]}\n{"id":"2","s":"2","arr":["0","1"]}\n{"id":"3","s":"3","arr":["0","1","2"]}\n{"id":"4","s":"4","arr":["0","1","2","3"]}\n{"id":"5","s":"5","arr":["0","1","2","3","4"]}\n{"id":"6","s":"6","arr":["0","1","2","3","4","5"]}\n{"id":"7","s":"7","arr":["0","1","2","3","4","5","6"]}\n{"id":"8","s":"8","arr":["0","1","2","3","4","5","6","7"]}\n{"id":"9","s":"9","arr":["0","1","2","3","4","5","6","7","8"]}\n' -JSONLines -b'{"id":"0","s":"0","arr":[]}\n{"id":"1","s":"1","arr":["0"]}\n{"id":"2","s":"2","arr":["0","1"]}\n{"id":"3","s":"3","arr":["0","1","2"]}\n{"id":"4","s":"4","arr":["0","1","2","3"]}\n{"id":"5","s":"5","arr":["0","1","2","3","4"]}\n{"id":"6","s":"6","arr":["0","1","2","3","4","5"]}\n{"id":"7","s":"7","arr":["0","1","2","3","4","5","6"]}\n{"id":"8","s":"8","arr":["0","1","2","3","4","5","6","7"]}\n{"id":"9","s":"9","arr":["0","1","2","3","4","5","6","7","8"]}\n' -JSONStringsEachRow -b'{"id":"0","s":"0","arr":"[]"}\n{"id":"1","s":"1","arr":"[0]"}\n{"id":"2","s":"2","arr":"[0,1]"}\n{"id":"3","s":"3","arr":"[0,1,2]"}\n{"id":"4","s":"4","arr":"[0,1,2,3]"}\n{"id":"5","s":"5","arr":"[0,1,2,3,4]"}\n{"id":"6","s":"6","arr":"[0,1,2,3,4,5]"}\n{"id":"7","s":"7","arr":"[0,1,2,3,4,5,6]"}\n{"id":"8","s":"8","arr":"[0,1,2,3,4,5,6,7]"}\n{"id":"9","s":"9","arr":"[0,1,2,3,4,5,6,7,8]"}\n' -MsgPack -b'\x00\xc4\x010\x90\x01\xc4\x011\x91\x00\x02\xc4\x012\x92\x00\x01\x03\xc4\x013\x93\x00\x01\x02\x04\xc4\x014\x94\x00\x01\x02\x03\x05\xc4\x015\x95\x00\x01\x02\x03\x04\x06\xc4\x016\x96\x00\x01\x02\x03\x04\x05\x07\xc4\x017\x97\x00\x01\x02\x03\x04\x05\x06\x08\xc4\x018\x98\x00\x01\x02\x03\x04\x05\x06\x07\t\xc4\x019\x99\x00\x01\x02\x03\x04\x05\x06\x07\x08' -NDJSON -b'{"id":"0","s":"0","arr":[]}\n{"id":"1","s":"1","arr":["0"]}\n{"id":"2","s":"2","arr":["0","1"]}\n{"id":"3","s":"3","arr":["0","1","2"]}\n{"id":"4","s":"4","arr":["0","1","2","3"]}\n{"id":"5","s":"5","arr":["0","1","2","3","4"]}\n{"id":"6","s":"6","arr":["0","1","2","3","4","5"]}\n{"id":"7","s":"7","arr":["0","1","2","3","4","5","6"]}\n{"id":"8","s":"8","arr":["0","1","2","3","4","5","6","7"]}\n{"id":"9","s":"9","arr":["0","1","2","3","4","5","6","7","8"]}\n' -Native -b'\x03\n\x02id\x06UInt64\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x01s\x06String\x010\x011\x012\x013\x014\x015\x016\x017\x018\x019\x03arr\rArray(UInt64)\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00\x00\x00\x00\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00' -ORC -b'ORC\n\x0b\n\x03\x00\x00\x00\x12\x04\x08\nP\x00\n\x15\n\x05\x00\x00\x00\x00\x00\x12\x0c\x08\n\x12\x06\x08\x00\x10\x12\x18ZP\x00\n\x12\n\x06\x00\x00\x00\x00\x00\x00\x12\x08\x08\nB\x02\x08\x14P\x00\n\r\n\x05\x00\x00\x00\x00\x00\x12\x04\x08\nP\x00\n\x16\n\x05\x00\x00\x00\x00\x00\x12\r\x08-\x12\x07\x08\x00\x10\x10\x18\xf0\x01P\x00\xfe\xff\xc0\xfe\xff\xc0\x07\x01\x00\xfe\xff\xc0\x07\x00\x010123456789\xfe\xff\xc0\x07\x01\x00\x02\xff\xff\xf8\xfd\x00\x00\x02\x00\x01\x00\x01\x01\x00\x02\x01\x00\x03\x01\x00\x04\x01\x00\x05\x01\x00\x06\x01\x00\n\x06\x08\x06\x10\x00\x18\r\n\x06\x08\x06\x10\x01\x18\x17\n\x06\x08\x06\x10\x02\x18\x14\n\x06\x08\x06\x10\x03\x18\x0f\n\x06\x08\x06\x10\x04\x18\x18\n\x06\x08\x00\x10\x00\x18\x03\n\x06\x08\x00\x10\x01\x18\x03\n\x06\x08\x01\x10\x01\x18\x03\n\x06\x08\x00\x10\x02\x18\x03\n\x06\x08\x02\x10\x02\x18\x03\n\x06\x08\x01\x10\x02\x18\n\n\x06\x08\x00\x10\x03\x18\x03\n\x06\x08\x02\x10\x03\x18\x03\n\x06\x08\x00\x10\x04\x18\x04\n\x06\x08\x01\x10\x04\x18\x19\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x12\x04\x08\x00\x10\x00\x1a\x03GMT\n3\n\x04\x08\nP\x00\n\x0c\x08\n\x12\x06\x08\x00\x10\x12\x18ZP\x00\n\x08\x08\nB\x02\x08\x14P\x00\n\x04\x08\nP\x00\n\r\x08-\x12\x07\x08\x00\x10\x10\x18\xf0\x01P\x00\x08\x03\x10\xb6\x02\x1a\x0b\x08\x03\x10_\x18< \x9b\x01(\n"\x19\x08\x0c\x12\x03\x01\x02\x03\x1a\x02id\x1a\x01s\x1a\x03arr \x00(\x000\x00"\x08\x08\x04 \x00(\x000\x00"\x08\x08\x08 \x00(\x000\x00"\x0b\x08\n\x12\x01\x04 \x00(\x000\x00"\x08\x08\x04 \x00(\x000\x000\n:\x04\x08\nP\x00:\x0c\x08\n\x12\x06\x08\x00\x10\x12\x18ZP\x00:\x08\x08\nB\x02\x08\x14P\x00:\x04\x08\nP\x00:\r\x08-\x12\x07\x08\x00\x10\x10\x18\xf0\x01P\x00@\x90NH\x01\x08\x92\x01\x10\x00\x18\x80\x80\x04"\x02\x00\x0b(50\x06\x82\xf4\x03\x03ORC\x18' -Parquet -b'PAR1\x15\x04\x15\xa0\x01\x15dL\x15\x14\x15\x04\x12\x00\x00P\x00\x00\r\x01\x00\x01\r\x08\x00\x02\r\x08\x00\x03\r\x08\x00\x04\r\x08\x00\x05\r\x08\x00\x06\r\x08\x00\x07\r\x08<\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\x14\x15\x18,\x15\x14\x15\x04\x15\x06\x15\x06\x1c6\x00(\x08\t\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n$\x04\x05\x102Tv\x98\x00\x00\x00&\xf4\x01\x1c\x15\x04\x195\x04\x00\x06\x19\x18\x02id\x15\x02\x16\x14\x16\xa4\x02\x16\xec\x01&\x8a\x01&\x08\x1c6\x00(\x08\t\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00\x15\x04\x15d\x15\\L\x15\x14\x15\x04\x12\x00\x002\x10\x01\x00\x00\x000\x01\x05\x001\x01\x05\x002\x01\x05\x003\x01\x05\x004\x01\x05\x005\x01\x05<6\x01\x00\x00\x007\x01\x00\x00\x008\x01\x00\x00\x009\x15\x00\x15\x14\x15\x18,\x15\x14\x15\x04\x15\x06\x15\x06\x1c6\x00(\x019\x18\x010\x00\x00\x00\n$\x04\x05\x102Tv\x98\x00\x00\x00&\xcc\x04\x1c\x15\x0c\x195\x04\x00\x06\x19\x18\x01s\x15\x02\x16\x14\x16\xca\x01\x16\xc6\x01&\xfe\x03&\x86\x03\x1c6\x00(\x019\x18\x010\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00\x15\x04\x15\x90\x01\x15\\L\x15\x12\x15\x04\x12\x00\x00H\x00\x00\r\x01\x00\x01\r\x08\x00\x02\r\x08\x00\x03\r\x08\x00\x04\r\x08\x00\x05\r\x08\x00\x06\r\x08<\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\\\x15`,\x15\\\x15\x04\x15\x06\x15\x06\x1c6\x02(\x08\x08\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00.\xb4\x07\x00\x00\x00\rh\xf7\xbe\xdf\xdf?\x05\x00\x00\x00\x03\xa8\xaaL\x02\x04\r\x00\x01!\x102\x102\x04!C\x05!Ce\x102Tv\x102Tv\x08\x00&\xee\x07\x1c\x15\x04\x195\x04\x00\x06\x198\x03arr\x04list\x04item\x15\x02\x16\\\x16\xdc\x02\x16\xac\x02&\xbc\x06&\xc2\x05\x1c6\x02(\x08\x08\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00\x15\x02\x19l5\x00\x18\x06schema\x15\x06\x00\x15\x04%\x00\x18\x02id%\x1cL\xac\x13@\x12\x00\x00\x00\x15\x0c%\x00\x18\x01s\x005\x00\x18\x03arr\x15\x02\x15\x06L<\x00\x00\x005\x04\x18\x04list\x15\x02\x00\x15\x04%\x02\x18\x04item%\x1cL\xac\x13@\x12\x00\x00\x00\x16\x14\x19\x1c\x19<&\xf4\x01\x1c\x15\x04\x195\x04\x00\x06\x19\x18\x02id\x15\x02\x16\x14\x16\xa4\x02\x16\xec\x01&\x8a\x01&\x08\x1c6\x00(\x08\t\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00&\xcc\x04\x1c\x15\x0c\x195\x04\x00\x06\x19\x18\x01s\x15\x02\x16\x14\x16\xca\x01\x16\xc6\x01&\xfe\x03&\x86\x03\x1c6\x00(\x019\x18\x010\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00&\xee\x07\x1c\x15\x04\x195\x04\x00\x06\x198\x03arr\x04list\x04item\x15\x02\x16\\\x16\xdc\x02\x16\xac\x02&\xbc\x06&\xc2\x05\x1c6\x02(\x08\x08\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19,\x15\x04\x15\x04\x15\x02\x00\x15\x00\x15\x04\x15\x02\x00\x00\x00\x16\xca\x06\x16\x14&\x08\x16\xde\x05\x14\x00\x00("parquet-cpp version 1.5.1-SNAPSHOT\x19<\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x00v\x01\x00\x00PAR1' -RowBinary -b'\x00\x00\x00\x00\x00\x00\x00\x00\x010\x00\x01\x00\x00\x00\x00\x00\x00\x00\x011\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x012\x02\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x013\x03\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x014\x04\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x015\x05\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x016\x06\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x017\x07\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x018\x08\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x019\t\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00' -RowBinaryWithNames -b'\x03\x02id\x01s\x03arr\x00\x00\x00\x00\x00\x00\x00\x00\x010\x00\x01\x00\x00\x00\x00\x00\x00\x00\x011\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x012\x02\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x013\x03\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x014\x04\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x015\x05\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x016\x06\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x017\x07\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x018\x08\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x019\t\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00' -RowBinaryWithNamesAndTypes -b'\x03\x02id\x01s\x03arr\x06UInt64\x06String\rArray(UInt64)\x00\x00\x00\x00\x00\x00\x00\x00\x010\x00\x01\x00\x00\x00\x00\x00\x00\x00\x011\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x012\x02\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x013\x03\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x014\x04\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x015\x05\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x016\x06\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x017\x07\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x018\x08\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x019\t\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00' -TSKV -b'id=0\ts=0\tarr=[]\nid=1\ts=1\tarr=[0]\nid=2\ts=2\tarr=[0,1]\nid=3\ts=3\tarr=[0,1,2]\nid=4\ts=4\tarr=[0,1,2,3]\nid=5\ts=5\tarr=[0,1,2,3,4]\nid=6\ts=6\tarr=[0,1,2,3,4,5]\nid=7\ts=7\tarr=[0,1,2,3,4,5,6]\nid=8\ts=8\tarr=[0,1,2,3,4,5,6,7]\nid=9\ts=9\tarr=[0,1,2,3,4,5,6,7,8]\n' -TSV -b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TSVRaw -b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TSVRawWithNames -b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TSVRawWithNamesAndTypes -b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TSVWithNames -b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TSVWithNamesAndTypes -b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TabSeparated -b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TabSeparatedRaw -b'0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TabSeparatedRawWithNames -b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TabSeparatedRawWithNamesAndTypes -b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TabSeparatedWithNames -b'id\ts\tarr\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -TabSeparatedWithNamesAndTypes -b'id\ts\tarr\nUInt64\tString\tArray(UInt64)\n0\t0\t[]\n1\t1\t[0]\n2\t2\t[0,1]\n3\t3\t[0,1,2]\n4\t4\t[0,1,2,3]\n5\t5\t[0,1,2,3,4]\n6\t6\t[0,1,2,3,4,5]\n7\t7\t[0,1,2,3,4,5,6]\n8\t8\t[0,1,2,3,4,5,6,7]\n9\t9\t[0,1,2,3,4,5,6,7,8]\n' -Values -b"(0,'0',[]),(1,'1',[0]),(2,'2',[0,1]),(3,'3',[0,1,2]),(4,'4',[0,1,2,3]),(5,'5',[0,1,2,3,4]),(6,'6',[0,1,2,3,4,5]),(7,'7',[0,1,2,3,4,5,6]),(8,'8',[0,1,2,3,4,5,6,7]),(9,'9',[0,1,2,3,4,5,6,7,8])" -LineAsString -b'0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n' -OK From 080a44f7a212f25e8b542d4bdf30b9d96c8dd331 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 12 May 2022 13:29:25 +0200 Subject: [PATCH 114/615] Update test --- tests/queries/0_stateless/02293_formats_json_columns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index 291908f26df..20eba0449d8 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 0311dbb42295cb7f4fad712f51771419fabc16fc Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 12 May 2022 15:15:31 +0000 Subject: [PATCH 115/615] Add default implementation for Nothing, support arrays of nullable for arrayFilter and similar functions --- .../Serializations/SerializationNothing.h | 2 +- src/Functions/CastOverloadResolver.h | 1 + src/Functions/FunctionsConversion.h | 1 + src/Functions/FunctionsMiscellaneous.h | 5 +- src/Functions/IFunction.cpp | 46 +++++++++++++---- src/Functions/IFunction.h | 25 ++++++++-- src/Functions/IFunctionAdaptors.h | 2 + src/Functions/array/FunctionArrayMapped.h | 50 ++++++++++++++++--- src/Functions/array/array.cpp | 1 + src/Functions/assumeNotNull.cpp | 1 + src/Functions/castOrDefault.cpp | 2 + src/Functions/indexHint.cpp | 2 + src/Functions/isConstant.cpp | 2 + src/Functions/map.cpp | 1 + src/Functions/materialize.h | 5 ++ src/Functions/toColumnTypeName.cpp | 2 + src/Functions/toNullable.cpp | 1 + src/Functions/toTypeName.cpp | 2 + src/Functions/tuple.cpp | 1 + .../0_stateless/01882_total_rows_approx.sh | 2 +- ...4_nothing_arguments_in_functions.reference | 29 +++++++++++ .../02294_nothing_arguments_in_functions.sql | 37 ++++++++++++++ .../0_stateless/02295_type_nothing.sql | 28 +++++++++++ ...llable_arguments_in_array_filter.reference | 4 ++ ...296_nullable_arguments_in_array_filter.sql | 4 ++ 25 files changed, 232 insertions(+), 24 deletions(-) create mode 100644 tests/queries/0_stateless/02295_type_nothing.sql create mode 100644 tests/queries/0_stateless/02296_nullable_arguments_in_array_filter.reference create mode 100644 tests/queries/0_stateless/02296_nullable_arguments_in_array_filter.sql diff --git a/src/DataTypes/Serializations/SerializationNothing.h b/src/DataTypes/Serializations/SerializationNothing.h index 2de93a29763..e46a1e6ed30 100644 --- a/src/DataTypes/Serializations/SerializationNothing.h +++ b/src/DataTypes/Serializations/SerializationNothing.h @@ -16,7 +16,7 @@ class SerializationNothing : public SimpleTextSerialization private: [[noreturn]] static void throwNoSerialization() { - throw Exception("Serialization is not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Serialization is not implemented for type Nothing", ErrorCodes::NOT_IMPLEMENTED); } public: void serializeBinary(const Field &, WriteBuffer &) const override { throwNoSerialization(); } diff --git a/src/Functions/CastOverloadResolver.h b/src/Functions/CastOverloadResolver.h index cff17d810fe..9b579c34923 100644 --- a/src/Functions/CastOverloadResolver.h +++ b/src/Functions/CastOverloadResolver.h @@ -98,6 +98,7 @@ protected: } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } private: diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 1363e91eb0d..846c8915f26 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2509,6 +2509,7 @@ protected: } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } diff --git a/src/Functions/FunctionsMiscellaneous.h b/src/Functions/FunctionsMiscellaneous.h index 7e8cab842c8..ff27b0cc518 100644 --- a/src/Functions/FunctionsMiscellaneous.h +++ b/src/Functions/FunctionsMiscellaneous.h @@ -50,7 +50,8 @@ public: return expr_columns.getByName(signature->return_name).column; } -bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } private: ExpressionActionsPtr expression_actions; @@ -118,6 +119,7 @@ public: String getName() const override { return "FunctionCapture"; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -247,6 +249,7 @@ public: String getName() const override { return name; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override { return return_type; } size_t getNumberOfArguments() const override { return capture->captured_types.size(); } diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 95dafcbb575..453c31302a0 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -203,6 +204,31 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls( return nullptr; } +ColumnPtr IExecutableFunction::defaultImplementationForNothing( + const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const +{ + if (!useDefaultImplementationForNothing()) + return nullptr; + + bool is_nothing_type_presented = false; + for (const auto & arg : args) + is_nothing_type_presented |= isNothing(arg.type); + + if (!is_nothing_type_presented) + return nullptr; + + if (!isNothing(result_type)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Function {} with argument with type Nothing and default implementation for Nothing " + "is expected to return result with type Nothing, got {}", + getName(), + result_type->getName()); + + return ColumnConst::create(ColumnNothing::create(1), input_rows_count); + +} + ColumnPtr IExecutableFunction::executeWithoutLowCardinalityColumns( const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const { @@ -212,6 +238,9 @@ ColumnPtr IExecutableFunction::executeWithoutLowCardinalityColumns( if (auto res = defaultImplementationForNulls(args, result_type, input_rows_count, dry_run)) return res; + if (auto res = defaultImplementationForNothing(args, result_type, input_rows_count)) + return res; + ColumnPtr res; if (dry_run) res = executeDryRunImpl(args, result_type, input_rows_count); @@ -275,11 +304,6 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const { - /// Result type Nothing means that we don't need to execute function at all. - /// Example: select arrayMap(x -> 2 * x, []); - if (isNothing(result_type)) - return result_type->createColumn(); - if (useDefaultImplementationForSparseColumns()) { size_t num_sparse_columns = 0; @@ -435,13 +459,13 @@ DataTypePtr IFunctionOverloadResolver::getReturnTypeWithoutLowCardinality(const } } - /// If one of the arguments is Nothing, then we won't really execute - /// the function and the result type should be also Nothing. - /// Example: select arrayMap(x -> 2 * x, []); - for (const auto & arg : arguments) + if (!arguments.empty() && useDefaultImplementationForNothing()) { - if (isNothing(arg.type)) - return std::make_shared(); + for (const auto & arg : arguments) + { + if (isNothing(arg.type)) + return std::make_shared(); + } } return getReturnTypeImpl(arguments); diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 7b272fef53d..95af8a61aae 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -63,6 +63,11 @@ protected: */ virtual bool useDefaultImplementationForNulls() const { return true; } + /** Default implementation in presence of arguments with type Nothing is the following: + * If some of arguments have type Nothing then default implementation is to return constant column with type Nothing + */ + virtual bool useDefaultImplementationForNothing() const { return true; } + /** If the function have non-zero number of arguments, * and if all arguments are constant, that we could automatically provide default implementation: * arguments are converted to ordinary columns with single value, then function is executed as usual, @@ -100,6 +105,9 @@ private: ColumnPtr defaultImplementationForNulls( const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const; + ColumnPtr defaultImplementationForNothing( + const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const; + ColumnPtr executeWithoutLowCardinalityColumns( const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const; @@ -166,8 +174,8 @@ public: /** If function isSuitableForConstantFolding then, this method will be called during query analyzis * if some arguments are constants. For example logical functions (AndFunction, OrFunction) can * return they result based on some constant arguments. - * Arguments are passed without modifications, useDefaultImplementationForNulls, useDefaultImplementationForConstants, - * useDefaultImplementationForLowCardinality are not applied. + * Arguments are passed without modifications, useDefaultImplementationForNulls, useDefaultImplementationForNothing, + * useDefaultImplementationForConstants, useDefaultImplementationForLowCardinality are not applied. */ virtual ColumnPtr getConstantResultForNonConstArguments( const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr & /* result_type */) const { return nullptr; } @@ -354,7 +362,13 @@ protected: */ virtual bool useDefaultImplementationForNulls() const { return true; } - /** If useDefaultImplementationForNulls() is true, then change arguments for getReturnType() and build(). + /** If useDefaultImplementationForNothing() is true, then change arguments for getReturnType() and build(): + * if some of arguments are Nothing then don't call getReturnType(), call build() with return_type = Nothing, + * Otherwise build returns build(arguments, getReturnType(arguments)); + */ + virtual bool useDefaultImplementationForNothing() const { return true; } + + /** If useDefaultImplementationForLowCardinalityColumns() is true, then change arguments for getReturnType() and build(). * If function arguments has low cardinality types, convert them to ordinary types. * getReturnType returns ColumnLowCardinality if at least one argument type is ColumnLowCardinality. */ @@ -403,6 +417,11 @@ public: */ virtual bool useDefaultImplementationForNulls() const { return true; } + /** Default implementation in presence of arguments with type Nothing is the following: + * If some of arguments have type Nothing then default implementation is to return constant column with type Nothing + */ + virtual bool useDefaultImplementationForNothing() const { return true; } + /** If the function have non-zero number of arguments, * and if all arguments are constant, that we could automatically provide default implementation: * arguments are converted to ordinary columns with single value, then function is executed as usual, diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index ec43087ad66..dfb90fd3975 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -27,6 +27,7 @@ protected: } bool useDefaultImplementationForNulls() const final { return function->useDefaultImplementationForNulls(); } + bool useDefaultImplementationForNothing() const final { return function->useDefaultImplementationForNothing(); } bool useDefaultImplementationForConstants() const final { return function->useDefaultImplementationForConstants(); } bool useDefaultImplementationForLowCardinalityColumns() const final { return function->useDefaultImplementationForLowCardinalityColumns(); } bool useDefaultImplementationForSparseColumns() const final { return function->useDefaultImplementationForSparseColumns(); } @@ -124,6 +125,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { return function->getReturnTypeImpl(arguments); } bool useDefaultImplementationForNulls() const override { return function->useDefaultImplementationForNulls(); } + bool useDefaultImplementationForNothing() const override { return function->useDefaultImplementationForNothing(); } bool useDefaultImplementationForLowCardinalityColumns() const override { return function->useDefaultImplementationForLowCardinalityColumns(); } bool useDefaultImplementationForSparseColumns() const override { return function->useDefaultImplementationForSparseColumns(); } bool canBeExecutedOnLowCardinalityDictionary() const override { return function->canBeExecutedOnLowCardinalityDictionary(); } diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 58e6db86f75..f4f1d39d07f 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -16,11 +17,13 @@ #include #include #include +#include #include #include #include +#include #include @@ -156,7 +159,7 @@ public: DataTypePtr nested_type = data_type->getNestedType(); - if (Impl::needBoolean() && !WhichDataType(nested_type).isUInt8()) + if (Impl::needBoolean() && !isUInt8(nested_type)) throw Exception("The only argument for function " + getName() + " must be array of UInt8. Found " + arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -180,8 +183,14 @@ public: /// The types of the remaining arguments are already checked in getLambdaArgumentTypes. DataTypePtr return_type = removeLowCardinality(data_type_function->getReturnType()); - if (Impl::needBoolean() && !WhichDataType(return_type).isUInt8()) - throw Exception("Expression for function " + getName() + " must return UInt8, found " + + /// Special cases when we need boolean lambda result: + /// - lambda may return Nullable(UInt8) column, in this case after lambda execution we will + /// replace all NULLs with 0 and return nested UInt8 column. + /// - lambda may return Nothing or Nullable(Nothing) because of default implementation of functions + /// for these types. In this case we will just create UInt8 const column full of 0. + if (Impl::needBoolean() && !isUInt8(removeNullable(return_type)) && !isNothing(removeNullable(return_type))) + throw Exception("Expression for function " + getName() + " must return UInt8 or Nullable(UInt8), found " + return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); static_assert( @@ -316,11 +325,38 @@ public: auto * replicated_column_function = typeid_cast(replicated_column_function_ptr.get()); replicated_column_function->appendArguments(arrays); - auto lambda_result = replicated_column_function->reduce().column; - if (lambda_result->lowCardinality()) - lambda_result = lambda_result->convertToFullColumnIfLowCardinality(); + auto lambda_result = replicated_column_function->reduce(); + if (lambda_result.column->lowCardinality()) + lambda_result.column = lambda_result.column->convertToFullColumnIfLowCardinality(); - return Impl::execute(*column_first_array, lambda_result); + if (Impl::needBoolean()) + { + toColumnTypeName + /// If result column is Nothing or Nullable(Nothing), just create const UInt8 column with 0 value. + if (isNothing(removeNullable(lambda_result.type))) + { + auto result_type = std::make_shared(); + lambda_result.column = result_type->createColumnConst(lambda_result.column->size(), 0); + } + /// If result column is Nullable(UInt8), then extract nested column and write 0 in all rows + /// when we have NULL. + else if (lambda_result.column->isNullable()) + { + auto result_column = IColumn::mutate(std::move(lambda_result.column)); + auto * column_nullable = assert_cast(result_column.get()); + auto & null_map = column_nullable->getNullMapData(); + auto nested_column = IColumn::mutate(std::move(column_nullable->getNestedColumnPtr())); + auto & nested_data = assert_cast(nested_column.get())->getData(); + for (size_t i = 0; i != nested_data.size(); ++i) + { + if (null_map[i]) + nested_data[i] = 0; + } + lambda_result.column = std::move(nested_column); + } + } + + return Impl::execute(*column_first_array, lambda_result.column); } } }; diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index 72380c0a2bf..4ef530e9c88 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -20,6 +20,7 @@ public: } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } bool isVariadic() const override { return true; } diff --git a/src/Functions/assumeNotNull.cpp b/src/Functions/assumeNotNull.cpp index b180931a83a..0fd1c08f855 100644 --- a/src/Functions/assumeNotNull.cpp +++ b/src/Functions/assumeNotNull.cpp @@ -33,6 +33,7 @@ public: size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; } diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index f7b93ec2e83..2e9f13545fd 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -52,6 +52,7 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } @@ -194,6 +195,7 @@ private: bool isVariadic() const override { return true; } bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); } + bool useDefaultImplementationForNothing() const override { return impl.useDefaultImplementationForNothing(); } bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns();} bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants();} bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override diff --git a/src/Functions/indexHint.cpp b/src/Functions/indexHint.cpp index bb38a56cf27..1f3dd23cc31 100644 --- a/src/Functions/indexHint.cpp +++ b/src/Functions/indexHint.cpp @@ -39,6 +39,8 @@ public: bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool isSuitableForConstantFolding() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/isConstant.cpp b/src/Functions/isConstant.cpp index 7ea3e26cb82..6d76cfc1dcc 100644 --- a/src/Functions/isConstant.cpp +++ b/src/Functions/isConstant.cpp @@ -27,6 +27,8 @@ public: bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } size_t getNumberOfArguments() const override diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 471d6fc575c..28c949b5dc3 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -65,6 +65,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override diff --git a/src/Functions/materialize.h b/src/Functions/materialize.h index aab4e5bdbdf..e71e463066e 100644 --- a/src/Functions/materialize.h +++ b/src/Functions/materialize.h @@ -23,6 +23,11 @@ public: return false; } + bool useDefaultImplementationForNothing() const override + { + return false; + } + /// Get the function name. String getName() const override { diff --git a/src/Functions/toColumnTypeName.cpp b/src/Functions/toColumnTypeName.cpp index 95c35243567..345de2954c4 100644 --- a/src/Functions/toColumnTypeName.cpp +++ b/src/Functions/toColumnTypeName.cpp @@ -26,6 +26,8 @@ public: bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override { settings.enable_lazy_execution_for_first_argument = true; diff --git a/src/Functions/toNullable.cpp b/src/Functions/toNullable.cpp index b7fe831f4ff..16d9f9198cd 100644 --- a/src/Functions/toNullable.cpp +++ b/src/Functions/toNullable.cpp @@ -28,6 +28,7 @@ public: size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/toTypeName.cpp b/src/Functions/toTypeName.cpp index 41567ac1ee3..f3af49315ed 100644 --- a/src/Functions/toTypeName.cpp +++ b/src/Functions/toTypeName.cpp @@ -30,6 +30,8 @@ public: bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override { settings.enable_lazy_execution_for_first_argument = false; diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index 6d5c53c0770..5a06ac21be4 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -52,6 +52,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override diff --git a/tests/queries/0_stateless/01882_total_rows_approx.sh b/tests/queries/0_stateless/01882_total_rows_approx.sh index 26333f61692..fe0163919a7 100755 --- a/tests/queries/0_stateless/01882_total_rows_approx.sh +++ b/tests/queries/0_stateless/01882_total_rows_approx.sh @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "create table data_01882 (key Int) Engine=MergeTree() part # thus check few times to be sure that this is not coincidence. for _ in {1..30}; do $CLICKHOUSE_CURL -vsS "${CLICKHOUSE_URL}&max_threads=1&default_format=Null&send_progress_in_http_headers=1&http_headers_progress_interval_ms=1" --data-binary @- <<< "select * from data_01882" |& { - grep -o -F '"total_rows_to_read":"10"' + grep -F '"total_rows_to_read"' } | { # grep out final result grep -v -F '"read_rows":"10"' diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference index 954015207ad..9360b9a1922 100644 --- a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference @@ -4,3 +4,32 @@ Array(Nothing) Array(Nothing) [] Array(Nothing) +Array(Nothing) +Array(Nothing) +[] +Array(Nothing) +Array(Nothing) +Array(Nothing) + +Nothing +Const(Nothing) +Nothing +Const(Nothing) +Nothing +Nothing +Array(Nothing) +Const(Array(Nothing)) +Array(Nothing) +Array(Nothing) +Map(UInt8, Nothing) +Const(Map(UInt8, Nothing)) +Map(UInt8, Nothing) +Map(UInt8, Nothing) +Tuple(UInt8, Nothing) +Const(Tuple(UInt8, Nothing)) +Tuple(UInt8, Nothing) +Tuple(UInt8, Nothing) +Nothing +Const(Nothing) +Nothing +Nothing diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql index 3df2577e465..732664e081f 100644 --- a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql @@ -4,3 +4,40 @@ select arrayMap((x, y) -> x + y, [], []); select toTypeName(arrayMap((x, y) -> x + y, [], [])); select arrayMap((x, y) -> x + y, [], CAST([], 'Array(Int32)')); select toTypeName(arrayMap((x, y) -> x + y, [], CAST([], 'Array(Int32)'))); + +select toTypeName(arrayMap(x -> 2 * x, [assumeNotNull(NULL)])); +select toColumnTypeName(arrayMap(x -> 2 * x, [assumeNotNull(NULL)])); + +select arrayFilter(x -> 2 * x < 0, []); +select toTypeName(arrayFilter(x -> 2 * x < 0, [])); +select toTypeName(arrayFilter(x -> 2 * x < 0, [assumeNotNull(NULL)])); +select toColumnTypeName(arrayFilter(x -> 2 * x < 0, [assumeNotNull(NULL)])); + +select CAST(assumeNotNull(NULL), 'String'); +select toTypeName(toInt32(assumeNotNull(NULL))); +select toColumnTypeName(toInt32(assumeNotNull(NULL))); + +select toTypeName(assumeNotNull(NULL)); +select toColumnTypeName(assumeNotNull(NULL)); +select toTypeName(assumeNotNull(materialize(NULL))); +select toColumnTypeName(assumeNotNull(materialize(NULL))); + +select toTypeName([assumeNotNull(NULL)]); +select toColumnTypeName([assumeNotNull(NULL)]); +select toTypeName([assumeNotNull(materialize(NULL))]); +select toColumnTypeName([assumeNotNull(materialize(NULL))]); + +select toTypeName(map(1, assumeNotNull(NULL))); +select toColumnTypeName(map(1, assumeNotNull(NULL))); +select toTypeName(map(1, assumeNotNull(materialize(NULL)))); +select toColumnTypeName(map(1, assumeNotNull(materialize(NULL)))); + +select toTypeName(tuple(1, assumeNotNull(NULL))); +select toColumnTypeName(tuple(1, assumeNotNull(NULL))); +select toTypeName(tuple(1, assumeNotNull(materialize(NULL)))); +select toColumnTypeName(tuple(1, assumeNotNull(materialize(NULL)))); + +select toTypeName(assumeNotNull(NULL) * 2); +select toColumnTypeName(assumeNotNull(NULL) * 2); +select toTypeName(assumeNotNull(materialize(NULL)) * 2); +select toColumnTypeName(assumeNotNull(materialize(NULL)) * 2); diff --git a/tests/queries/0_stateless/02295_type_nothing.sql b/tests/queries/0_stateless/02295_type_nothing.sql new file mode 100644 index 00000000000..4848a3bb751 --- /dev/null +++ b/tests/queries/0_stateless/02295_type_nothing.sql @@ -0,0 +1,28 @@ +select CAST(assumeNotNull(NULL), 'String') + +select toTypeName(assumeNotNull(NULL)); +select toColumnName(assumeNotNull(NULL)); +select toTypeName(assumeNotNull(materialize(NULL))); +select toColumnName(assumeNotNull(materialize(NULL))); + +select toTypeName([assumeNotNull(NULL)]); +select toColumnName([assumeNotNull(NULL)]); +select toTypeName([assumeNotNull(materialize(NULL))]); +select toColumnName([assumeNotNull(materialize(NULL))]); + +select toTypeName(map(1, assumeNotNull(NULL))); +select toColumnName(map(1, assumeNotNull(NULL))); +select toTypeName(map(1, assumeNotNull(materialize(NULL)))); +select toColumnName(map(1, assumeNotNull(materialize(NULL)))); + +select toTypeName(tuple(1, assumeNotNull(NULL))); +select toColumnName(tuple(1, assumeNotNull(NULL))); +select toTypeName(tuple(1, assumeNotNull(materialize(NULL)))); +select toColumnName(tuple(1, assumeNotNull(materialize(NULL)))); + +select toTypeName(assumeNotNull(NULL) * 2); +select toColumnName(assumeNotNull(NULL) * 2); +select toTypeName(assumeNotNull(materialize(NULL)) * 2); +select toColumnName(assumeNotNull(materialize(NULL)) * 2); + + diff --git a/tests/queries/0_stateless/02296_nullable_arguments_in_array_filter.reference b/tests/queries/0_stateless/02296_nullable_arguments_in_array_filter.reference new file mode 100644 index 00000000000..8c11de86262 --- /dev/null +++ b/tests/queries/0_stateless/02296_nullable_arguments_in_array_filter.reference @@ -0,0 +1,4 @@ +[] +[] +[2,4] +[1,3] diff --git a/tests/queries/0_stateless/02296_nullable_arguments_in_array_filter.sql b/tests/queries/0_stateless/02296_nullable_arguments_in_array_filter.sql new file mode 100644 index 00000000000..3c1f2b41909 --- /dev/null +++ b/tests/queries/0_stateless/02296_nullable_arguments_in_array_filter.sql @@ -0,0 +1,4 @@ +select arrayFilter(x -> 2 * x > 0, []); +select arrayFilter(x -> 2 * x > 0, [NULL]); +select arrayFilter(x -> x % 2 ? NULL : 1, [1, 2, 3, 4]); +select arrayFilter(x -> x % 2, [1, NULL, 3, NULL]); From 4c945d7fe58c68134918c95d3eb2eccd80a1f604 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 12 May 2022 16:07:58 +0000 Subject: [PATCH 116/615] Fix --- src/Functions/array/FunctionArrayMapped.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index f4f1d39d07f..0af68910b70 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -331,7 +331,6 @@ public: if (Impl::needBoolean()) { - toColumnTypeName /// If result column is Nothing or Nullable(Nothing), just create const UInt8 column with 0 value. if (isNothing(removeNullable(lambda_result.type))) { From 83c2ee8c8b41ba028e1efadf7ea278eb1e2cfeaa Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 13 May 2022 00:28:44 +0800 Subject: [PATCH 117/615] fix build --- src/Storages/WindowView/StorageWindowView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index ccd51110d72..0e55a090d2d 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1009,7 +1009,7 @@ void StorageWindowView::read( if (target_table_id.empty()) return; - auto storage = getTargetStorage(); + auto storage = getTargetTable(); auto lock = storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto target_metadata_snapshot = storage->getInMemoryMetadataPtr(); auto target_storage_snapshot = storage->getStorageSnapshot(target_metadata_snapshot, local_context); From d1b5362250df0aacf150c27ecc97986f9ab78a33 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 12 May 2022 18:59:26 +0200 Subject: [PATCH 118/615] Fix tests --- tests/queries/0_stateless/01882_total_rows_approx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01882_total_rows_approx.sh b/tests/queries/0_stateless/01882_total_rows_approx.sh index fe0163919a7..26333f61692 100755 --- a/tests/queries/0_stateless/01882_total_rows_approx.sh +++ b/tests/queries/0_stateless/01882_total_rows_approx.sh @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "create table data_01882 (key Int) Engine=MergeTree() part # thus check few times to be sure that this is not coincidence. for _ in {1..30}; do $CLICKHOUSE_CURL -vsS "${CLICKHOUSE_URL}&max_threads=1&default_format=Null&send_progress_in_http_headers=1&http_headers_progress_interval_ms=1" --data-binary @- <<< "select * from data_01882" |& { - grep -F '"total_rows_to_read"' + grep -o -F '"total_rows_to_read":"10"' } | { # grep out final result grep -v -F '"read_rows":"10"' From 3a1f6f4fb22ab3e834c06b8644af962486e776c2 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 12 May 2022 18:59:48 +0200 Subject: [PATCH 119/615] Delete 02295_type_nothing.sql --- .../0_stateless/02295_type_nothing.sql | 28 ------------------- 1 file changed, 28 deletions(-) delete mode 100644 tests/queries/0_stateless/02295_type_nothing.sql diff --git a/tests/queries/0_stateless/02295_type_nothing.sql b/tests/queries/0_stateless/02295_type_nothing.sql deleted file mode 100644 index 4848a3bb751..00000000000 --- a/tests/queries/0_stateless/02295_type_nothing.sql +++ /dev/null @@ -1,28 +0,0 @@ -select CAST(assumeNotNull(NULL), 'String') - -select toTypeName(assumeNotNull(NULL)); -select toColumnName(assumeNotNull(NULL)); -select toTypeName(assumeNotNull(materialize(NULL))); -select toColumnName(assumeNotNull(materialize(NULL))); - -select toTypeName([assumeNotNull(NULL)]); -select toColumnName([assumeNotNull(NULL)]); -select toTypeName([assumeNotNull(materialize(NULL))]); -select toColumnName([assumeNotNull(materialize(NULL))]); - -select toTypeName(map(1, assumeNotNull(NULL))); -select toColumnName(map(1, assumeNotNull(NULL))); -select toTypeName(map(1, assumeNotNull(materialize(NULL)))); -select toColumnName(map(1, assumeNotNull(materialize(NULL)))); - -select toTypeName(tuple(1, assumeNotNull(NULL))); -select toColumnName(tuple(1, assumeNotNull(NULL))); -select toTypeName(tuple(1, assumeNotNull(materialize(NULL)))); -select toColumnName(tuple(1, assumeNotNull(materialize(NULL)))); - -select toTypeName(assumeNotNull(NULL) * 2); -select toColumnName(assumeNotNull(NULL) * 2); -select toTypeName(assumeNotNull(materialize(NULL)) * 2); -select toColumnName(assumeNotNull(materialize(NULL)) * 2); - - From d8580c8cb8e741f651d4ca5a2407d8e57a4eff07 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 12 May 2022 19:51:04 +0200 Subject: [PATCH 120/615] Digging --- src/Disks/DiskObjectStorage.cpp | 111 +++++++++++++++++++------------- src/Disks/DiskObjectStorage.h | 14 ++-- src/Disks/IObjectStorage.cpp | 12 ++++ src/Disks/IObjectStorage.h | 6 ++ src/Disks/S3/registerDiskS3.cpp | 10 ++- src/Disks/S3ObjectStorage.cpp | 26 +++++++- src/Disks/S3ObjectStorage.h | 11 ++-- 7 files changed, 133 insertions(+), 57 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 8fbde6dc6ca..04adebf1e82 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -569,16 +569,6 @@ void DiskObjectStorage::startup() LOG_INFO(log, "Starting up disk {}", name); object_storage->startup(); - if (send_metadata) - { - metadata_helper->restore(); - - if (metadata_helper->readSchemaVersion(remote_fs_root_path) < DiskObjectStorageMetadataHelper::RESTORABLE_SCHEMA_VERSION) - metadata_helper->migrateToRestorableSchema(); - - metadata_helper->findLastRevision(); - } - LOG_INFO(log, "Disk {} started up", name); } @@ -674,6 +664,26 @@ void DiskObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration object_storage->applyNewSettings(config, "storage_configuration.disks." + name, context_); } +void DiskObjectStorage::restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + if (send_metadata) + { + LOG_DEBUG(log, "START RESTORING METADATA"); + metadata_helper->restore(config, config_prefix, context); + + if (metadata_helper->readSchemaVersion(object_storage.get(), remote_fs_root_path) < DiskObjectStorageMetadataHelper::RESTORABLE_SCHEMA_VERSION) + { + LOG_DEBUG(log, "DONE READING"); + metadata_helper->migrateToRestorableSchema(); + LOG_DEBUG(log, "MIGRATION FINISHED"); + } + + LOG_DEBUG(log, "SEARCHING LAST REVISION"); + metadata_helper->findLastRevision(); + LOG_DEBUG(log, "DONE RESTORING METADATA"); + } +} + DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const { if (i != 0) @@ -750,14 +760,14 @@ void DiskObjectStorageMetadataHelper::findLastRevision() LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); } -int DiskObjectStorageMetadataHelper::readSchemaVersion(const String & source_path) const +int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) const { const std::string path = source_path + SCHEMA_VERSION_OBJECT; int version = 0; - if (!disk->object_storage->exists(path)) + if (!object_storage->exists(path)) return version; - auto buf = disk->object_storage->readObject(path); + auto buf = object_storage->readObject(path); readIntText(version, *buf); return version; @@ -800,20 +810,22 @@ void DiskObjectStorageMetadataHelper::migrateToRestorableSchemaRecursive(const S bool dir_contains_only_files = true; for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + { if (disk->isDirectory(it->path())) { dir_contains_only_files = false; break; } + } /// The whole directory can be migrated asynchronously. if (dir_contains_only_files) { auto result = disk->getExecutor().execute([this, path] - { - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - migrateFileToRestorableSchema(it->path()); - }); + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + migrateFileToRestorableSchema(it->path()); + }); results.push_back(std::move(result)); } @@ -863,15 +875,18 @@ void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() } } -void DiskObjectStorageMetadataHelper::restore() +void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { if (!disk->exists(RESTORE_FILE_NAME)) + { return; + } try { RestoreInformation information; information.source_path = disk->remote_fs_root_path; + information.source_namespace = disk->object_storage->getObjectsNamespace(); readRestoreInformation(information); if (information.revision == 0) @@ -879,19 +894,28 @@ void DiskObjectStorageMetadataHelper::restore() if (!information.source_path.ends_with('/')) information.source_path += '/'; - /// In this case we need to additionally cleanup S3 from objects with later revision. - /// Will be simply just restore to different path. - if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) - throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); + IObjectStorage * source_object_storage = disk->object_storage.get(); + if (information.source_namespace == disk->object_storage->getObjectsNamespace()) + { + /// In this case we need to additionally cleanup S3 from objects with later revision. + /// Will be simply just restore to different path. + if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) + throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); - /// This case complicates S3 cleanup in case of unsuccessful restore. - if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) - throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); + /// This case complicates S3 cleanup in case of unsuccessful restore. + if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) + throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); + } + else + { + object_storage_from_another_namespace = disk->object_storage->cloneObjectStorage(information.source_namespace, config, config_prefix, context); + source_object_storage = object_storage_from_another_namespace.get(); + } LOG_INFO(disk->log, "Starting to restore disk {}. Revision: {}, Source path: {}", disk->name, information.revision, information.source_path); - if (readSchemaVersion(information.source_path) < RESTORABLE_SCHEMA_VERSION) + if (readSchemaVersion(source_object_storage, information.source_path) < RESTORABLE_SCHEMA_VERSION) throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); LOG_INFO(disk->log, "Removing old metadata..."); @@ -901,8 +925,8 @@ void DiskObjectStorageMetadataHelper::restore() if (disk->exists(root)) disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); - restoreFiles(information); - restoreFileOperations(information); + restoreFiles(source_object_storage, information); + restoreFileOperations(source_object_storage, information); disk->metadata_disk->removeFile(RESTORE_FILE_NAME); @@ -949,10 +973,12 @@ void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation for (const auto & [key, value] : properties) { - ReadBufferFromString value_buffer (value); + ReadBufferFromString value_buffer(value); if (key == "revision") readIntText(restore_information.revision, value_buffer); + else if (key == "source_bucket" || key == "source_namespace") + readText(restore_information.source_namespace, value_buffer); else if (key == "source_path") readText(restore_information.source_path, value_buffer); else if (key == "detached") @@ -988,12 +1014,12 @@ static std::tuple extractRevisionAndOperationFromKey(const Strin return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; } -void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & restore_information) +void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) { LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); std::vector> results; - auto restore_files = [this, &restore_information, &results](const BlobsPathToSize & keys) + auto restore_files = [this, &source_object_storage, &restore_information, &results](const BlobsPathToSize & keys) { std::vector keys_names; for (const auto & [key, size] : keys) @@ -1012,9 +1038,9 @@ void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & re if (!keys_names.empty()) { - auto result = disk->getExecutor().execute([this, &restore_information, keys_names]() + auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]() { - processRestoreFiles(restore_information.source_path, keys_names); + processRestoreFiles(source_object_storage, restore_information.source_path, keys_names); }); results.push_back(std::move(result)); @@ -1024,7 +1050,7 @@ void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & re }; BlobsPathToSize children; - disk->object_storage->listPrefix(restore_information.source_path, children); + source_object_storage->listPrefix(restore_information.source_path, children); restore_files(children); for (auto & result : results) @@ -1036,11 +1062,11 @@ void DiskObjectStorageMetadataHelper::restoreFiles(const RestoreInformation & re } -void DiskObjectStorageMetadataHelper::processRestoreFiles(const String & source_path, std::vector keys) +void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) { for (const auto & key : keys) { - auto meta = disk->object_storage->getObjectMetadata(key); + auto meta = source_object_storage->getObjectMetadata(key); auto object_attributes = meta.attributes; String path; @@ -1066,7 +1092,7 @@ void DiskObjectStorageMetadataHelper::processRestoreFiles(const String & source_ /// Copy object if we restore to different bucket / path. if (disk->remote_fs_root_path != source_path) - disk->object_storage->copyObject(key, disk->remote_fs_root_path + relative_key); + source_object_storage->copyObjectToAnotherObjectStorage(key, disk->remote_fs_root_path + relative_key, *disk->object_storage); auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) { @@ -1088,13 +1114,13 @@ static String pathToDetached(const String & source_path) return fs::path(source_path).parent_path() / "detached/"; } -void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInformation & restore_information) +void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) { /// Enable recording file operations if we restore to different bucket / path. - bool send_metadata = disk->remote_fs_root_path != restore_information.source_path; + bool send_metadata = source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != restore_information.source_path; std::set renames; - auto restore_file_operations = [this, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) + auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) { const String rename = "rename"; const String hardlink = "hardlink"; @@ -1117,7 +1143,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInforma if (send_metadata) revision_counter = revision - 1; - auto object_attributes = *(disk->object_storage->getObjectMetadata(key).attributes); + auto object_attributes = *(source_object_storage->getObjectMetadata(key).attributes); if (operation == rename) { auto from_path = object_attributes["from_path"]; @@ -1180,7 +1206,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInforma }; BlobsPathToSize children; - disk->object_storage->listPrefix(restore_information.source_path + "operations/", children); + source_object_storage->listPrefix(restore_information.source_path + "operations/", children); restore_file_operations(children); if (restore_information.detached) @@ -1224,5 +1250,4 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(const RestoreInforma LOG_INFO(disk->log, "File operations restored for disk {}", disk->name); } - } diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index 2147f9527d5..7e5d30dfea2 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -164,6 +164,7 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override; + void restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); private: const String name; const String remote_fs_root_path; @@ -284,6 +285,7 @@ public: struct RestoreInformation { UInt64 revision = LATEST_REVISION; + String source_namespace; String source_path; bool detached = false; }; @@ -293,18 +295,18 @@ public: void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; void findLastRevision(); - int readSchemaVersion(const String & source_path) const; + int readSchemaVersion(IObjectStorage * object_storage, const String & source_path) const; void saveSchemaVersion(const int & version) const; void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; void migrateFileToRestorableSchema(const String & path) const; void migrateToRestorableSchemaRecursive(const String & path, Futures & results); void migrateToRestorableSchema(); - void restore(); + void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); void readRestoreInformation(RestoreInformation & restore_information); - void restoreFiles(const RestoreInformation & restore_information); - void processRestoreFiles(const String & source_path, std::vector keys); - void restoreFileOperations(const RestoreInformation & restore_information); + void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); + void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys); + void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); std::atomic revision_counter = 0; inline static const String RESTORE_FILE_NAME = "restore"; @@ -318,6 +320,8 @@ public: DiskObjectStorage * disk; + ObjectStoragePtr object_storage_from_another_namespace; + ReadSettings read_settings; }; diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/IObjectStorage.cpp index ac8f3fc39e8..44b9430172b 100644 --- a/src/Disks/IObjectStorage.cpp +++ b/src/Disks/IObjectStorage.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -34,4 +35,15 @@ void IObjectStorage::removeFromCache(const std::string & path) } } +void IObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) +{ + if (&object_storage_to == this) + copyObject(object_from, object_to, object_to_attributes); + + auto in = readObject(object_from); + auto out = object_storage_to.writeObject(object_to); + copyData(*in, *out); + out->finalize(); +} + } diff --git a/src/Disks/IObjectStorage.h b/src/Disks/IObjectStorage.h index f2cc9b90294..6a66ffb622e 100644 --- a/src/Disks/IObjectStorage.h +++ b/src/Disks/IObjectStorage.h @@ -97,6 +97,8 @@ public: virtual void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) = 0; + virtual void copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes = {}); + virtual ~IObjectStorage() = default; std::string getCacheBasePath() const; @@ -113,6 +115,10 @@ public: virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + virtual String getObjectsNamespace() const = 0; + + virtual std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + protected: FileCachePtr cache; }; diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 6a052dfab02..54b736788fa 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -79,7 +79,7 @@ void registerDiskS3(DiskFactory & factory) getSettings(config, config_prefix, context), uri.version_id, uri.bucket); - std::shared_ptr s3disk = std::make_shared( + std::shared_ptr s3disk = std::make_shared( name, uri.key, "DiskS3", @@ -98,6 +98,9 @@ void registerDiskS3(DiskFactory & factory) s3disk->startup(); + s3disk->restoreMetadataIfNeeded(config, config_prefix, context); + + std::shared_ptr disk_result = s3disk; #ifdef NDEBUG bool use_cache = true; @@ -110,10 +113,11 @@ void registerDiskS3(DiskFactory & factory) if (config.getBool(config_prefix + ".cache_enabled", use_cache)) { String cache_path = config.getString(config_prefix + ".cache_path", context->getPath() + "disks/" + name + "/cache/"); - s3disk = wrapWithCache(s3disk, "s3-cache", cache_path, metadata_path); + disk_result = wrapWithCache(disk_result, "s3-cache", cache_path, metadata_path); } - return std::make_shared(s3disk); + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "DONE DISK"); + return std::make_shared(disk_result); }; factory.registerDiskType("s3", creator); } diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index a941022a574..0a7bd45d546 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -81,11 +81,15 @@ bool S3ObjectStorage::exists(const std::string & path) const auto object_head = requestObjectHeadData(bucket, path); if (!object_head.IsSuccess()) { - if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "OBJECT DOESNT {} EXISTS", path); return false; + } throwIfError(object_head); } + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "OBJECT {} EXISTS", path); return true; } @@ -291,6 +295,15 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons return result; } +void S3ObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) +{ + /// Shortcut for S3 + if (auto * dest_s3 = dynamic_cast(&object_storage_to); dest_s3 != nullptr) + copyObjectImpl(bucket, object_from, dest_s3->bucket, object_to, {}, object_to_attributes); + else + IObjectStorage::copyObjectToAnotherObjectStorage(object_from, object_to, object_storage_to, object_to_attributes); +} + void S3ObjectStorage::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, std::optional head, std::optional metadata) const @@ -428,7 +441,7 @@ void S3ObjectStorage::startup() auto client_ptr = client.get(); /// Need to be enabled if it was disabled during shutdown() call. - const_cast(*client_ptr.get()).EnableRequestProcessing(); + const_cast(*client_ptr).EnableRequestProcessing(); } void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) @@ -437,6 +450,15 @@ void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & client.set(getClient(config, config_prefix, context)); } +std::unique_ptr S3ObjectStorage::cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + return std::make_unique( + nullptr, getClient(config, config_prefix, context), + getSettings(config, config_prefix, context), + version_id, new_namespace); } +} + + #endif diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h index b0762d07535..7632a643130 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/S3ObjectStorage.h @@ -17,7 +17,6 @@ namespace DB struct S3ObjectStorageSettings { - S3ObjectStorageSettings() = default; S3ObjectStorageSettings( @@ -95,9 +94,7 @@ public: void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) override; - void setNewSettings(std::unique_ptr && s3_settings_); - - void setNewClient(std::unique_ptr && client_); + void copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes = {}) override; void shutdown() override; @@ -105,7 +102,13 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + String getObjectsNamespace() const override { return bucket; } + + std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; private: + void setNewSettings(std::unique_ptr && s3_settings_); + + void setNewClient(std::unique_ptr && client_); void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, std::optional head = std::nullopt, From 03a7f7c4bdd2f72a581b5522948ede4a39199362 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Fri, 13 May 2022 08:43:42 +0900 Subject: [PATCH 121/615] disallow null characters in custom alphabet --- src/Functions/FunctionHashID.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index a593d41a9a1..a8cee0372ff 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -4,17 +4,17 @@ #if USE_HASHIDSXX -#include +# include -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include -#include -#include +# include +# include namespace DB { @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int INVALID_ALPHABET; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } @@ -114,7 +115,11 @@ public: { const auto & alphabetcolumn = arguments[3].column; if (auto alpha_col = checkAndGetColumnConst(alphabetcolumn.get())) + { alphabet = alpha_col->getValue(); + if (alphabet.find("\0") != std::string::npos) + throw Exception(ErrorCodes::INVALID_ALPHABET, "Custom alphabet must not contain null character"); + } } else alphabet.assign(DEFAULT_ALPHABET); From 62a1e1c0cdda88187f55af574e1939880e69804d Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Fri, 13 May 2022 09:58:14 +0900 Subject: [PATCH 122/615] use existing error code --- src/Functions/FunctionHashID.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index a8cee0372ff..9739d450af2 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -21,9 +21,9 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int INVALID_ALPHABET; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } @@ -118,7 +118,7 @@ public: { alphabet = alpha_col->getValue(); if (alphabet.find("\0") != std::string::npos) - throw Exception(ErrorCodes::INVALID_ALPHABET, "Custom alphabet must not contain null character"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Custom alphabet must not contain null character"); } } else From 9599c1f05ca8e8a82440cc748ea1231c5dac377b Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Fri, 13 May 2022 19:01:20 +0900 Subject: [PATCH 123/615] use single-character find for bad alphabet --- src/Functions/FunctionHashID.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index 9739d450af2..8cc789d3680 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -117,7 +117,7 @@ public: if (auto alpha_col = checkAndGetColumnConst(alphabetcolumn.get())) { alphabet = alpha_col->getValue(); - if (alphabet.find("\0") != std::string::npos) + if (alphabet.find('\0') != std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Custom alphabet must not contain null character"); } } From 3ecd9f972a7105d2d4fc8e6f7e2ac6ccf44efd41 Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 13 May 2022 12:51:51 +0000 Subject: [PATCH 124/615] alter table support for windowview --- src/Storages/WindowView/StorageWindowView.cpp | 223 ++++++++++++------ src/Storages/WindowView/StorageWindowView.h | 25 +- src/Storages/WindowView/WindowViewSource.h | 2 + 3 files changed, 165 insertions(+), 85 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 1319c864b7b..de7543a8f67 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -452,6 +453,65 @@ bool StorageWindowView::optimize( return getInnerStorage()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); } +void StorageWindowView::alter( + const AlterCommands & params, + ContextPtr local_context, + AlterLockHolder &) +{ + auto table_id = getStorageID(); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); + params.apply(new_metadata, local_context); + + const auto & new_select = new_metadata.select; + const auto & new_select_query = new_metadata.select.inner_query; + + auto old_inner_table_id = inner_table_id; + + modifying_query = true; + shutdown(); + + auto inner_query = initInnerQuery(new_select_query->as(), local_context); + + dropInnerTableIfAny(true, getContext()); + + /// create inner table + std::exchange(has_inner_table, true); + auto create_context = Context::createCopy(local_context); + auto inner_create_query = getInnerTableCreateQuery(inner_query, inner_table_id); + InterpreterCreateQuery create_interpreter(inner_create_query, create_context); + create_interpreter.setInternal(true); + create_interpreter.execute(); + + DatabaseCatalog::instance().addDependency(select_table_id, table_id); + DatabaseCatalog::instance().updateDependency(old_inner_table_id, table_id, inner_table_id, table_id); + + shutdown_called = false; + + clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); + fire_task = getContext()->getSchedulePool().createTask( + getStorageID().getFullTableName(), [this] { is_proctime ? threadFuncFireProc() : threadFuncFireEvent(); }); + clean_cache_task->deactivate(); + fire_task->deactivate(); + + new_metadata.setSelectQuery(new_select); + + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata); + setInMemoryMetadata(new_metadata); + + startup(); + modifying_query = false; +} + +void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const +{ + for (const auto & command : commands) + { + if (!command.isCommentAlter() && command.type != AlterCommand::MODIFY_QUERY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName()); + } +} + std::pair StorageWindowView::getNewBlocks(UInt32 watermark) { UInt32 w_start = addTime(watermark, window_kind, -window_num_units, *time_zone); @@ -600,13 +660,12 @@ inline void StorageWindowView::fire(UInt32 watermark) } } -std::shared_ptr StorageWindowView::getInnerTableCreateQuery( - const ASTPtr & inner_query, ASTStorage * storage, const String & database_name, const String & table_name) +ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, const StorageID & inner_table_id) { /// We will create a query to create an internal table. auto inner_create_query = std::make_shared(); - inner_create_query->setDatabase(database_name); - inner_create_query->setTable(table_name); + inner_create_query->setDatabase(inner_table_id.getDatabaseName()); + inner_create_query->setTable(inner_table_id.getTableName()); Aliases aliases; QueryAliasesVisitor(aliases).visit(inner_query); @@ -684,33 +743,34 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( }; auto new_storage = std::make_shared(); - /// storage != nullptr in case create window view with ENGINE syntax - if (storage) + /// inner_storage_engine != nullptr in case create window view with ENGINE syntax + if (inner_table_engine) { - new_storage->set(new_storage->engine, storage->engine->clone()); + auto storage = inner_table_engine->as(); + new_storage->set(new_storage->engine, storage.engine->clone()); - if (storage->ttl_table) + if (storage.ttl_table) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "TTL is not supported for inner table in Window View"); - if (!endsWith(storage->engine->name, "MergeTree")) + if (!endsWith(storage.engine->name, "MergeTree")) throw Exception( ErrorCodes::INCORRECT_QUERY, "The ENGINE of WindowView must be MergeTree family of table engines " "including the engines with replication support"); - if (storage->partition_by) - new_storage->set(new_storage->partition_by, visit(storage->partition_by)); - if (storage->primary_key) - new_storage->set(new_storage->primary_key, visit(storage->primary_key)); - if (storage->order_by) - new_storage->set(new_storage->order_by, visit(storage->order_by)); - if (storage->sample_by) - new_storage->set(new_storage->sample_by, visit(storage->sample_by)); + if (storage.partition_by) + new_storage->set(new_storage->partition_by, visit(storage.partition_by)); + if (storage.primary_key) + new_storage->set(new_storage->primary_key, visit(storage.primary_key)); + if (storage.order_by) + new_storage->set(new_storage->order_by, visit(storage.order_by)); + if (storage.sample_by) + new_storage->set(new_storage->sample_by, visit(storage.sample_by)); - if (storage->settings) - new_storage->set(new_storage->settings, storage->settings->clone()); + if (storage.settings) + new_storage->set(new_storage->settings, storage.settings->clone()); } else { @@ -969,6 +1029,7 @@ StorageWindowView::StorageWindowView( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) + , clean_interval_ms(context_->getSettingsRef().window_view_clean_interval.totalMilliseconds()) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); @@ -982,11 +1043,47 @@ StorageWindowView::StorageWindowView( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "UNION is not supported for {}", getName()); - select_query = query.select->list_of_selects->children.at(0)->clone(); + /// Extract information about watermark, lateness. + eventTimeParser(query); + + target_table_id = query.to_table_id; + + if (query.storage) + inner_table_engine = query.storage->clone(); + + auto inner_query = initInnerQuery(query.select->list_of_selects->children.at(0)->as(), context_); + + if(is_proctime) + next_fire_signal = getWindowUpperBound(std::time(nullptr)); + + std::exchange(has_inner_table, true); + if (!attach_) + { + auto inner_create_query = getInnerTableCreateQuery(inner_query, inner_table_id); + auto create_context = Context::createCopy(context_); + InterpreterCreateQuery create_interpreter(inner_create_query, create_context); + create_interpreter.setInternal(true); + create_interpreter.execute(); + } + + DatabaseCatalog::instance().addDependency(select_table_id, getStorageID()); + + clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); + fire_task = getContext()->getSchedulePool().createTask( + getStorageID().getFullTableName(), [this] { is_proctime ? threadFuncFireProc() : threadFuncFireEvent(); }); + clean_cache_task->deactivate(); + fire_task->deactivate(); +} + +ASTPtr StorageWindowView::initInnerQuery(ASTSelectQuery query, ContextPtr context_) +{ + select_query = query.clone(); + sample_block.clear(); + String select_database_name = getContext()->getCurrentDatabase(); String select_table_name; - auto select_query_tmp = select_query->clone(); - extractDependentTable(getContext(), select_query_tmp, select_database_name, select_table_name); + auto select_query_tmp = query.clone(); + extractDependentTable(context_, select_query_tmp, select_database_name, select_table_name); /// If the table is not specified - use the table `system.one` if (select_table_name.empty()) @@ -995,77 +1092,40 @@ StorageWindowView::StorageWindowView( select_table_name = "one"; } select_table_id = StorageID(select_database_name, select_table_name); - DatabaseCatalog::instance().addDependency(select_table_id, table_id_); /// Extract all info from query; substitute Function_tumble and Function_hop with Function_windowID. - auto inner_query = innerQueryParser(select_query->as()); + auto inner_query = innerQueryParser(query); - // Parse mergeable query + /// Parse mergeable query mergeable_query = inner_query->clone(); ReplaceFunctionNowData func_now_data; ReplaceFunctionNowVisitor(func_now_data).visit(mergeable_query); is_time_column_func_now = func_now_data.is_time_column_func_now; + if (!is_proctime && is_time_column_func_now) + throw Exception("now() is not supported for Event time processing.", ErrorCodes::INCORRECT_QUERY); if (is_time_column_func_now) window_id_name = func_now_data.window_id_name; - // Parse final query (same as mergeable query but has tumble/hop instead of windowID) + /// Parse final query (same as mergeable query but has tumble/hop instead of windowID) final_query = mergeable_query->clone(); ReplaceWindowIdMatcher::Data final_query_data; - if (is_tumble) - final_query_data.window_name = "tumble"; - else - final_query_data.window_name = "hop"; + final_query_data.window_name = is_tumble ? "tumble" : "hop"; ReplaceWindowIdMatcher::Visitor(final_query_data).visit(final_query); - is_watermark_strictly_ascending = query.is_watermark_strictly_ascending; - is_watermark_ascending = query.is_watermark_ascending; - is_watermark_bounded = query.is_watermark_bounded; - target_table_id = query.to_table_id; + window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), is_tumble ? "tumble" : "hop"); - /// Extract information about watermark, lateness. - eventTimeParser(query); - - if (is_tumble) - window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "tumble"); - else - window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "hop"); - - auto generate_inner_table_name = [](const StorageID & storage_id) + auto generate_inner_table_id = [](const StorageID & storage_id) { - if (storage_id.hasUUID()) - return ".inner." + toString(storage_id.uuid); - return ".inner." + storage_id.table_name; + StorageID table_id = StorageID::createEmpty(); + table_id.database_name = storage_id.database_name; + table_id.table_name = ".inner." + (storage_id.hasUUID() ? toString(storage_id.uuid) : storage_id.table_name); + return table_id; }; + inner_table_id = generate_inner_table_id(getStorageID()); - if (attach_) - { - inner_table_id = StorageID(table_id_.database_name, generate_inner_table_name(table_id_)); - } - else - { - auto inner_create_query - = getInnerTableCreateQuery(inner_query, query.storage, table_id_.database_name, generate_inner_table_name(table_id_)); - - auto create_context = Context::createCopy(context_); - InterpreterCreateQuery create_interpreter(inner_create_query, create_context); - create_interpreter.setInternal(true); - create_interpreter.execute(); - inner_table_id = StorageID(inner_create_query->getDatabase(), inner_create_query->getTable()); - } - - clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds(); - next_fire_signal = getWindowUpperBound(std::time(nullptr)); - - clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); - if (is_proctime) - fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); }); - else - fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); }); - clean_cache_task->deactivate(); - fire_task->deactivate(); + return inner_query; } - ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query) { if (!query.groupBy()) @@ -1127,13 +1187,16 @@ ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query) void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) { + watermark_num_units = 0; + lateness_num_units = 0; + is_watermark_strictly_ascending = query.is_watermark_strictly_ascending; + is_watermark_ascending = query.is_watermark_ascending; + is_watermark_bounded = query.is_watermark_bounded; + if (query.is_watermark_strictly_ascending || query.is_watermark_ascending || query.is_watermark_bounded) { is_proctime = false; - if (is_time_column_func_now) - throw Exception("now() is not supported for Event time processing.", ErrorCodes::INCORRECT_QUERY); - if (query.is_watermark_ascending) { is_watermark_bounded = true; @@ -1147,6 +1210,8 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) "Illegal type WATERMARK function should be Interval"); } } + else + is_proctime = true; if (query.allowed_lateness) { @@ -1155,11 +1220,16 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) query.lateness_function, lateness_kind, lateness_num_units, "Illegal type ALLOWED_LATENESS function should be Interval"); } + else + allowed_lateness = false; } void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, const Block & block, ContextPtr local_context) { + while (window_view.modifying_query) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); UInt32 lateness_bound = 0; @@ -1340,6 +1410,7 @@ void StorageWindowView::shutdown() { std::lock_guard lock(mutex); fire_condition.notify_all(); + fire_signal_condition.notify_all(); } clean_cache_task->deactivate(); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 782e8f2b899..e19260ef7e0 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -134,6 +134,10 @@ public: const Names & deduplicate_by_columns, ContextPtr context) override; + void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; + + void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; + void startup() override; void shutdown() override; @@ -161,10 +165,11 @@ private: /// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *) ASTPtr final_query; - bool is_proctime{true}; + bool is_proctime; bool is_time_column_func_now; bool is_tumble; // false if is hop std::atomic shutdown_called{false}; + std::atomic modifying_query{false}; bool has_inner_table{true}; mutable Block sample_block; UInt64 clean_interval_ms; @@ -172,10 +177,10 @@ private: UInt32 max_timestamp = 0; UInt32 max_watermark = 0; // next watermark to fire UInt32 max_fired_watermark = 0; - bool is_watermark_strictly_ascending{false}; - bool is_watermark_ascending{false}; - bool is_watermark_bounded{false}; - bool allowed_lateness{false}; + bool is_watermark_strictly_ascending; + bool is_watermark_ascending; + bool is_watermark_bounded; + bool allowed_lateness; UInt32 next_fire_signal; std::deque fire_signal; std::list> watch_streams; @@ -195,8 +200,8 @@ private: Int64 window_num_units; Int64 hop_num_units; Int64 slice_num_units; - Int64 watermark_num_units = 0; - Int64 lateness_num_units = 0; + Int64 watermark_num_units; + Int64 lateness_num_units; Int64 slide_num_units; String window_id_name; String window_id_alias; @@ -207,6 +212,8 @@ private: StorageID target_table_id = StorageID::createEmpty(); StorageID inner_table_id = StorageID::createEmpty(); + ASTPtr inner_table_engine; + BackgroundSchedulePool::TaskHolder clean_cache_task; BackgroundSchedulePool::TaskHolder fire_task; @@ -215,9 +222,8 @@ private: ASTPtr innerQueryParser(const ASTSelectQuery & query); void eventTimeParser(const ASTCreateQuery & query); + ASTPtr initInnerQuery(ASTSelectQuery query, ContextPtr context); - std::shared_ptr getInnerTableCreateQuery( - const ASTPtr & inner_query, ASTStorage * storage, const String & database_name, const String & table_name); UInt32 getCleanupBound(); ASTPtr getCleanupQuery(); @@ -235,6 +241,7 @@ private: ASTPtr getFinalQuery() const { return final_query->clone(); } ASTPtr getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const; + ASTPtr getInnerTableCreateQuery(const ASTPtr & inner_query, const StorageID & inner_table_id); StoragePtr getParentStorage() const; diff --git a/src/Storages/WindowView/WindowViewSource.h b/src/Storages/WindowView/WindowViewSource.h index a726cdc8712..7b914933035 100644 --- a/src/Storages/WindowView/WindowViewSource.h +++ b/src/Storages/WindowView/WindowViewSource.h @@ -51,6 +51,8 @@ protected: Block block; UInt32 watermark; std::tie(block, watermark) = generateImpl(); + if (!block) + return Chunk(); if (is_events) { return Chunk( From 121fd26ab179a0e5b4f6fe4c3ad3ff6be231929c Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 13 May 2022 13:10:34 +0000 Subject: [PATCH 125/615] add test --- ...01076_window_view_alter_query_to.reference | 9 +++ .../01076_window_view_alter_query_to.sh | 47 +++++++++++ ...iew_alter_query_to_modify_source.reference | 9 +++ ...indow_view_alter_query_to_modify_source.sh | 50 ++++++++++++ .../01078_window_view_alter_query_watch.py | 78 +++++++++++++++++++ ...78_window_view_alter_query_watch.reference | 0 6 files changed, 193 insertions(+) create mode 100644 tests/queries/0_stateless/01076_window_view_alter_query_to.reference create mode 100755 tests/queries/0_stateless/01076_window_view_alter_query_to.sh create mode 100644 tests/queries/0_stateless/01077_window_view_alter_query_to_modify_source.reference create mode 100755 tests/queries/0_stateless/01077_window_view_alter_query_to_modify_source.sh create mode 100755 tests/queries/0_stateless/01078_window_view_alter_query_watch.py create mode 100644 tests/queries/0_stateless/01078_window_view_alter_query_watch.reference diff --git a/tests/queries/0_stateless/01076_window_view_alter_query_to.reference b/tests/queries/0_stateless/01076_window_view_alter_query_to.reference new file mode 100644 index 00000000000..fa8f7331398 --- /dev/null +++ b/tests/queries/0_stateless/01076_window_view_alter_query_to.reference @@ -0,0 +1,9 @@ +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +----ALTER TABLE...MODIFY QUERY---- +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +1 12 1990-01-01 12:00:15 +1 14 1990-01-01 12:00:15 diff --git a/tests/queries/0_stateless/01076_window_view_alter_query_to.sh b/tests/queries/0_stateless/01076_window_view_alter_query_to.sh new file mode 100755 index 00000000000..959a0b9e1e2 --- /dev/null +++ b/tests/queries/0_stateless/01076_window_view_alter_query_to.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery < Date: Fri, 13 May 2022 13:26:38 +0000 Subject: [PATCH 126/615] update test --- ...065_window_view_event_hop_watch_bounded.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index 92d2b56ed34..7cee8081352 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -26,32 +26,32 @@ with client(name="client1>", log=log) as client1, client( client2.expect(prompt) client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) + client1.expect("Ok.") client1.send("DROP TABLE IF EXISTS test.wv") - client1.expect(prompt) + client1.expect("Ok.") client1.send("DROP TABLE IF EXISTS `.inner.wv`") - client1.expect(prompt) + client1.expect("Ok.") client1.send( "CREATE TABLE test.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" ) - client1.expect(prompt) + client1.expect("Ok.") client1.send( "CREATE WINDOW VIEW test.wv WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM test.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) - client1.expect(prompt) + client1.expect("Ok.") client1.send("WATCH test.wv") client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:00');") - client2.expect(prompt) + client2.expect("Ok.") client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:05');") - client2.expect(prompt) - client1.expect("1*" + end_of_block) + client2.expect("Ok.") + client1.expect("1" + end_of_block) client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:06');") + client2.expect("Ok.") client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:10');") - client2.expect(prompt) - client1.expect("1*" + end_of_block) - client1.expect("2*" + end_of_block) + client2.expect("Ok.") + client1.expect("2" + end_of_block) # send Ctrl-C client1.send("\x03", eol="") From 930939ef7566d9a43ea70380a668de17d89accab Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 13 May 2022 13:38:53 +0000 Subject: [PATCH 127/615] update code style --- src/Storages/WindowView/StorageWindowView.cpp | 1 + .../01078_window_view_alter_query_watch.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index de7543a8f67..18639d21c78 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -66,6 +66,7 @@ namespace ErrorCodes extern const int QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW; extern const int SUPPORT_IS_DISABLED; extern const int TABLE_WAS_NOT_DROPPED; + extern const int NOT_IMPLEMENTED; } namespace diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 12dc4b36152..8d1e7a6f667 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -44,9 +44,13 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01078_window_view_alter_query_watch.wv") client1.expect("Query id" + end_of_block) - client2.send("INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:00');") + client2.send( + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:00');" + ) client2.expect("Ok.") - client2.send("INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:06');") + client2.send( + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:06');" + ) client2.expect("Ok.") client1.expect("1*" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") @@ -57,9 +61,13 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send("WATCH 01078_window_view_alter_query_watch.wv") client1.expect("Query id" + end_of_block) - client2.send("INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:11');") + client2.send( + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:11');" + ) client2.expect("Ok.") - client2.send("INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:18');") + client2.send( + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:18');" + ) client2.expect("Ok.") client1.expect("2*" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") From b17fec659aa0bb1f27c7a7d5296bd7855b2bf09c Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 13 May 2022 13:51:28 +0000 Subject: [PATCH 128/615] Improve performance and memory usage for select of subset of columns for some formats --- src/Core/Settings.h | 2 +- src/Formats/EscapingRuleUtils.cpp | 12 ++--- src/Formats/FormatFactory.cpp | 8 ++-- src/Formats/FormatFactory.h | 6 +-- src/Formats/registerWithNamesAndTypes.cpp | 6 +++ src/Formats/registerWithNamesAndTypes.h | 3 ++ src/IO/ReadHelpers.cpp | 44 +++++++++++-------- src/IO/ReadHelpers.h | 5 ++- .../Formats/Impl/ArrowBlockInputFormat.cpp | 2 +- .../Formats/Impl/BinaryRowInputFormat.cpp | 1 + .../Formats/Impl/CSVRowInputFormat.cpp | 5 ++- .../Formats/Impl/CapnProtoRowInputFormat.cpp | 1 + .../Impl/CustomSeparatedRowInputFormat.cpp | 1 + .../Impl/JSONCompactEachRowRowInputFormat.cpp | 1 + .../Impl/JSONEachRowRowInputFormat.cpp | 5 +++ .../Formats/Impl/MySQLDumpRowInputFormat.cpp | 6 +-- .../Formats/Impl/ORCBlockInputFormat.cpp | 2 +- .../Formats/Impl/ParquetBlockInputFormat.cpp | 2 +- .../Formats/Impl/ProtobufListInputFormat.cpp | 2 +- .../Formats/Impl/ProtobufRowInputFormat.cpp | 1 + .../Formats/Impl/TSKVRowInputFormat.cpp | 2 + .../Impl/TabSeparatedRowInputFormat.cpp | 8 +++- .../Formats/Impl/ValuesBlockInputFormat.cpp | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 6 +-- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/Hive/StorageHive.cpp | 6 +-- src/Storages/Hive/StorageHive.h | 2 +- src/Storages/IStorage.h | 2 +- src/Storages/StorageFile.cpp | 10 ++--- src/Storages/StorageFile.h | 6 +-- src/Storages/StorageS3.cpp | 6 +-- src/Storages/StorageS3.h | 2 +- src/Storages/StorageURL.cpp | 8 ++-- src/Storages/StorageURL.h | 2 +- src/Storages/StorageXDBC.cpp | 2 +- src/Storages/StorageXDBC.h | 2 +- .../performance/formats_columns_sampling.xml | 33 ++++++++++++++ 37 files changed, 145 insertions(+), 71 deletions(-) create mode 100644 tests/performance/formats_columns_sampling.xml diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2e6d657698c..4c654a3e842 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -631,7 +631,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \ M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices \\N", 0) \ M(Bool, input_format_csv_arrays_as_nested_csv, false, R"(When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted.)", 0) \ - M(Bool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, -WithNames, -WithNamesAndTypes and TSKV formats).", 0) \ + M(Bool, input_format_skip_unknown_fields, true, "Skip columns with unknown names from input data (it works for JSONEachRow, -WithNames, -WithNamesAndTypes and TSKV formats).", 0) \ M(Bool, input_format_with_names_use_header, true, "For -WithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \ M(Bool, input_format_with_types_use_header, true, "For -WithNamesAndTypes input formats this controls whether format parser should check if data types from the input match data types from the header.", 0) \ M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \ diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 1875caf1855..3a30d6dc63f 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -71,7 +71,7 @@ String escapingRuleToString(FormatSettings::EscapingRule escaping_rule) void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings) { - String tmp; + NullOutput out; constexpr const char * field_name = ""; constexpr size_t field_name_len = 16; switch (escaping_rule) @@ -80,19 +80,19 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca /// Empty field, just skip spaces break; case FormatSettings::EscapingRule::Escaped: - readEscapedString(tmp, buf); + readEscapedStringInto(out, buf); break; case FormatSettings::EscapingRule::Quoted: - readQuotedFieldIntoString(tmp, buf); + readQuotedFieldInto(out, buf); break; case FormatSettings::EscapingRule::CSV: - readCSVString(tmp, buf, format_settings.csv); + readCSVStringInto(out, buf, format_settings.csv); break; case FormatSettings::EscapingRule::JSON: skipJSONField(buf, StringRef(field_name, field_name_len)); break; case FormatSettings::EscapingRule::Raw: - readString(tmp, buf); + readStringInto(out, buf); break; default: __builtin_unreachable(); @@ -219,7 +219,7 @@ String readByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escapin if constexpr (read_string) readQuotedString(result, buf); else - readQuotedFieldIntoString(result, buf); + readQuotedField(result, buf); break; case FormatSettings::EscapingRule::JSON: if constexpr (read_string) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 96b52cd2423..a05ba2e1f38 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -538,19 +538,19 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na } -void FormatFactory::markFormatAsColumnOriented(const String & name) +void FormatFactory::markFormatSupportsSamplingColumns(const String & name) { - auto & target = dict[name].is_column_oriented; + auto & target = dict[name].supports_sampling_columns; if (target) throw Exception("FormatFactory: Format " + name + " is already marked as column oriented", ErrorCodes::LOGICAL_ERROR); target = true; } -bool FormatFactory::checkIfFormatIsColumnOriented(const String & name) +bool FormatFactory::checkIfFormatSupportsSamplingColumns(const String & name) { const auto & target = getCreators(name); - return target.is_column_oriented; + return target.supports_sampling_columns; } bool FormatFactory::isInputFormat(const String & name) const diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index f7d3c23d3b4..5d8c2e133f2 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -108,7 +108,7 @@ private: SchemaReaderCreator schema_reader_creator; ExternalSchemaReaderCreator external_schema_reader_creator; bool supports_parallel_formatting{false}; - bool is_column_oriented{false}; + bool supports_sampling_columns{false}; NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; AppendSupportChecker append_support_checker; }; @@ -194,9 +194,9 @@ public: void registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator); void markOutputFormatSupportsParallelFormatting(const String & name); - void markFormatAsColumnOriented(const String & name); + void markFormatSupportsSamplingColumns(const String & name); - bool checkIfFormatIsColumnOriented(const String & name); + bool checkIfFormatSupportsSamplingColumns(const String & name); bool checkIfFormatHasSchemaReader(const String & name); bool checkIfFormatHasExternalSchemaReader(const String & name); diff --git a/src/Formats/registerWithNamesAndTypes.cpp b/src/Formats/registerWithNamesAndTypes.cpp index cba578b08c7..81a711e736b 100644 --- a/src/Formats/registerWithNamesAndTypes.cpp +++ b/src/Formats/registerWithNamesAndTypes.cpp @@ -10,4 +10,10 @@ void registerWithNamesAndTypes(const std::string & base_format_name, RegisterWit register_func(base_format_name + "WithNamesAndTypes", true, true); } +void markFormatWithNamesAndTypesSupportsSamplingColumns(const std::string & base_format_name, FormatFactory & factory) +{ + factory.markFormatSupportsSamplingColumns(base_format_name + "WithNames"); + factory.markFormatSupportsSamplingColumns(base_format_name + "WithNamesAndTypes"); +} + } diff --git a/src/Formats/registerWithNamesAndTypes.h b/src/Formats/registerWithNamesAndTypes.h index d8e74e3421e..50a0eee9616 100644 --- a/src/Formats/registerWithNamesAndTypes.h +++ b/src/Formats/registerWithNamesAndTypes.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -9,4 +10,6 @@ namespace DB using RegisterWithNamesAndTypesFunc = std::function; void registerWithNamesAndTypes(const std::string & base_format_name, RegisterWithNamesAndTypesFunc register_func); +void markFormatWithNamesAndTypesSupportsSamplingColumns(const std::string & base_format_name, FormatFactory & factory); + } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 85c5ff2ec48..71ed197da3e 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -700,16 +700,18 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & if (!buf.hasPendingData()) continue; - /** CSV format can contain insignificant spaces and tabs. + if constexpr (!std::is_same_v) + { + /** CSV format can contain insignificant spaces and tabs. * Usually the task of skipping them is for the calling code. * But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself. */ - size_t size = s.size(); - while (size > 0 - && (s[size - 1] == ' ' || s[size - 1] == '\t')) - --size; + size_t size = s.size(); + while (size > 0 && (s[size - 1] == ' ' || s[size - 1] == '\t')) + --size; - s.resize(size); + s.resize(size); + } return; } } @@ -741,6 +743,7 @@ void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & sett } template void readCSVStringInto>(PaddedPODArray & s, ReadBuffer & buf, const FormatSettings::CSV & settings); +template void readCSVStringInto(NullOutput & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template @@ -1313,8 +1316,8 @@ void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delim } // Use PeekableReadBuffer to copy field to string after parsing. -template -static void readParsedValueIntoString(String & s, ReadBuffer & buf, ParseFunc parse_func) +template +static void readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_func) { PeekableReadBuffer peekable_buf(buf); peekable_buf.setCheckpoint(); @@ -1326,8 +1329,8 @@ static void readParsedValueIntoString(String & s, ReadBuffer & buf, ParseFunc pa peekable_buf.position() = end; } -template -static void readQuotedFieldInBrackets(String & s, ReadBuffer & buf) +template +static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf) { assertChar(opening_bracket, buf); s.push_back(opening_bracket); @@ -1363,10 +1366,9 @@ static void readQuotedFieldInBrackets(String & s, ReadBuffer & buf) } } -void readQuotedFieldIntoString(String & s, ReadBuffer & buf) +template +void readQuotedFieldInto(Vector & s, ReadBuffer & buf) { - s.clear(); - if (buf.eof()) return; @@ -1386,11 +1388,11 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf) s.push_back('\''); } else if (*buf.position() == '[') - readQuotedFieldInBrackets<'[', ']'>(s, buf); + readQuotedFieldInBracketsInto<'[', ']'>(s, buf); else if (*buf.position() == '(') - readQuotedFieldInBrackets<'(', ')'>(s, buf); + readQuotedFieldInBracketsInto<'(', ')'>(s, buf); else if (*buf.position() == '{') - readQuotedFieldInBrackets<'{', '}'>(s, buf); + readQuotedFieldInBracketsInto<'{', '}'>(s, buf); else if (checkCharCaseInsensitive('n', buf)) { /// NULL or NaN @@ -1423,14 +1425,20 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf) Float64 tmp; readFloatText(tmp, in); }; - readParsedValueIntoString(s, buf, parse_func); + readParsedValueInto(s, buf, parse_func); } } +void readQuotedField(String & s, ReadBuffer & buf) +{ + s.clear(); + readQuotedFieldInto(s, buf); +} + void readJSONFieldIntoString(String & s, ReadBuffer & buf) { auto parse_func = [](ReadBuffer & in) { skipJSONField(in, "json_field"); }; - readParsedValueIntoString(s, buf, parse_func); + readParsedValueInto(s, buf, parse_func); } } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index c5ffa52c9b3..ebf6e98d9a7 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1425,7 +1425,10 @@ struct PcgDeserializer } }; -void readQuotedFieldIntoString(String & s, ReadBuffer & buf); +template +void readQuotedFieldInto(Vector & s, ReadBuffer & buf); + +void readQuotedField(String & s, ReadBuffer & buf); void readJSONFieldIntoString(String & s, ReadBuffer & buf); diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 07331d82bb8..3d9a536b565 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -188,7 +188,7 @@ void registerInputFormatArrow(FormatFactory & factory) { return std::make_shared(buf, sample, false, format_settings); }); - factory.markFormatAsColumnOriented("Arrow"); + factory.markFormatSupportsSamplingColumns("Arrow"); factory.registerInputFormat( "ArrowStream", [](ReadBuffer & buf, diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index d3de2fbf494..080c3173b17 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -114,6 +114,7 @@ void registerInputFormatRowBinary(FormatFactory & factory) }; registerWithNamesAndTypes("RowBinary", register_func); + factory.markFormatSupportsSamplingColumns("RowBinaryWithNamesAndTypes"); factory.registerFileExtension("bin", "RowBinary"); } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 9990c33f0bb..0eaa02c97cb 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -112,7 +112,9 @@ String CSVFormatReader::readCSVFieldIntoString() void CSVFormatReader::skipField() { - readCSVFieldIntoString(); + skipWhitespacesAndTabs(*in); + NullOutput out; + readCSVStringInto(out, *in, format_settings.csv); } void CSVFormatReader::skipRowEndDelimiter() @@ -374,6 +376,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory) }; registerWithNamesAndTypes("CSV", register_func); + markFormatWithNamesAndTypesSupportsSamplingColumns("CSV", factory); } void registerCSVSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index 67743a04bf3..61394ef6d22 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -310,6 +310,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) return std::make_shared(buf, sample, std::move(params), FormatSchemaInfo(settings, "CapnProto", true), settings); }); + factory.markFormatSupportsSamplingColumns("CapnProto"); factory.registerFileExtension("capnp", "CapnProto"); } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 74c5fb1945a..56a639a0e30 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -333,6 +333,7 @@ void registerInputFormatCustomSeparated(FormatFactory & factory) }); }; registerWithNamesAndTypes(ignore_spaces ? "CustomSeparatedIgnoreSpaces" : "CustomSeparated", register_func); + markFormatWithNamesAndTypesSupportsSamplingColumns(ignore_spaces ? "CustomSeparatedIgnoreSpaces" : "CustomSeparated", factory); } } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 867b56c541b..d2f03011032 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -229,6 +229,7 @@ void registerInputFormatJSONCompactEachRow(FormatFactory & factory) }; registerWithNamesAndTypes(yield_strings ? "JSONCompactStringsEachRow" : "JSONCompactEachRow", register_func); + markFormatWithNamesAndTypesSupportsSamplingColumns(yield_strings ? "JSONCompactStringsEachRow" : "JSONCompactEachRow", factory); } } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 4fb7a40ebfc..bc820c61fca 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -393,6 +393,11 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) { return std::make_shared(buf, sample, std::move(params), settings, true); }); + + factory.markFormatSupportsSamplingColumns("JSONEachRow"); + factory.markFormatSupportsSamplingColumns("JSONLines"); + factory.markFormatSupportsSamplingColumns("NDJSON"); + factory.markFormatSupportsSamplingColumns("JSONStringsEachRow"); } void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index 5f3f015a5b1..8e787edf8ab 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -397,8 +397,8 @@ bool MySQLDumpRowInputFormat::readField(IColumn & column, size_t column_idx) void MySQLDumpRowInputFormat::skipField() { - String tmp; - readQuotedFieldIntoString(tmp, *in); + NullOutput out; + readQuotedFieldInto(out, *in); } MySQLDumpSchemaReader::MySQLDumpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) @@ -434,7 +434,7 @@ DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes() if (!data_types.empty()) skipFieldDelimiter(in); - readQuotedFieldIntoString(value, in); + readQuotedField(value, in); auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 1531c0d2794..0614eb3fc30 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -200,7 +200,7 @@ void registerInputFormatORC(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatAsColumnOriented("ORC"); + factory.markFormatSupportsSamplingColumns("ORC"); } void registerORCSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 86987c665e0..a49a7a4e0ca 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -193,7 +193,7 @@ void registerInputFormatParquet(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatAsColumnOriented("Parquet"); + factory.markFormatSupportsSamplingColumns("Parquet"); } void registerParquetSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp index 6fbcaa15536..91a8c10643e 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp @@ -79,7 +79,7 @@ void registerInputFormatProtobufList(FormatFactory & factory) return std::make_shared(buf, sample, std::move(params), FormatSchemaInfo(settings, "Protobuf", true), settings.protobuf.input_flatten_google_wrappers); }); - factory.markFormatAsColumnOriented("ProtobufList"); + factory.markFormatSupportsSamplingColumns("ProtobufList"); } void registerProtobufListSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 5c953a3fcc9..ed6222af4e1 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -69,6 +69,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) with_length_delimiter, settings.protobuf.input_flatten_google_wrappers); }); + factory.markFormatSupportsSamplingColumns(with_length_delimiter ? "Protobuf" : "ProtobufSingle"); } } diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 26c7d1aced5..113f18f73b6 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -280,6 +280,8 @@ void registerInputFormatTSKV(FormatFactory & factory) { return std::make_shared(buf, sample, std::move(params), settings); }); + + factory.markFormatSupportsSamplingColumns("TSKV"); } void registerTSKVSchemaReader(FormatFactory & factory) { diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 5f39c7bd646..0be8257f463 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -80,7 +80,11 @@ String TabSeparatedFormatReader::readFieldIntoString() void TabSeparatedFormatReader::skipField() { - readFieldIntoString(); + NullOutput out; + if (is_raw) + readStringInto(out, *in); + else + readEscapedStringInto(out, *in); } void TabSeparatedFormatReader::skipHeaderRow() @@ -347,6 +351,8 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory) registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func); registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func); + markFormatWithNamesAndTypesSupportsSamplingColumns(is_raw ? "TSVRaw" : "TSV", factory); + markFormatWithNamesAndTypesSupportsSamplingColumns(is_raw ? "TabSeparatedRaw" : "TabSeparated", factory); } // We can use the same segmentation engine for TSKV. diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 727b7fb0a1f..a95d049f94a 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -599,7 +599,7 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes() skipWhitespaceIfAny(buf); } - readQuotedFieldIntoString(value, buf); + readQuotedField(value, buf); auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 0176487bbfe..5d17bcb567e 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -476,9 +476,9 @@ private: }; -bool StorageHDFS::isColumnOriented() const +bool StorageHDFS::supportsSamplingColumns() const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); + return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSamplingColumns(format_name); } Pipe StorageHDFS::read( @@ -527,7 +527,7 @@ Pipe StorageHDFS::read( ColumnsDescription columns_description; Block block_for_format; - if (isColumnOriented()) + if (supportsSamplingColumns()) { auto fetch_columns = column_names; const auto & virtuals = getVirtuals(); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index b431407eba5..28c23c366d4 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -57,7 +57,7 @@ public: /// Is is useful because column oriented formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool isColumnOriented() const override; + bool supportsSamplingColumns() const override; static ColumnsDescription getTableStructureFromData( const String & format, diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 09c2f578419..63c3ce03945 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -622,14 +622,14 @@ HiveFilePtr StorageHive::getHiveFileIfNeeded( return hive_file; } -bool StorageHive::isColumnOriented() const +bool StorageHive::supportsSamplingColumns() const { return format_name == "Parquet" || format_name == "ORC"; } void StorageHive::getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const { - if (!isColumnOriented()) + if (!supportsSamplingColumns()) sample_block = header_block; UInt32 erased_columns = 0; for (const auto & column : partition_columns) @@ -795,7 +795,7 @@ std::optional StorageHive::totalRowsImpl(const Settings & settings, const SelectQueryInfo & query_info, ContextPtr context_, PruneLevel prune_level) const { /// Row-based format like Text doesn't support totalRowsByPartitionPredicate - if (!isColumnOriented()) + if (!supportsSamplingColumns()) return {}; auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index d61bb184574..fdd74ecd7a2 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -63,7 +63,7 @@ public: NamesAndTypesList getVirtuals() const override; - bool isColumnOriented() const override; + bool supportsSamplingColumns() const override; std::optional totalRows(const Settings & settings) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const override; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index c667f464a13..131aed1bef0 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -585,7 +585,7 @@ public: /// Returns true if all disks of storage are read-only. virtual bool isStaticStorage() const; - virtual bool isColumnOriented() const { return false; } + virtual bool supportsSamplingColumns() const { return false; } /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 5b191b37f5e..5899959c753 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -316,9 +316,9 @@ ColumnsDescription StorageFile::getTableStructureFromFile( return readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context); } -bool StorageFile::isColumnOriented() const +bool StorageFile::supportsSamplingColumns() const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); + return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSamplingColumns(format_name); } StorageFile::StorageFile(int table_fd_, CommonArguments args) @@ -465,7 +465,7 @@ public: const ColumnsDescription & columns_description, const FilesInfoPtr & files_info) { - if (storage->isColumnOriented()) + if (storage->supportsSamplingColumns()) return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); else return getHeader(storage_snapshot->metadata, files_info->need_path_column, files_info->need_file_column); @@ -530,7 +530,7 @@ public: auto get_block_for_format = [&]() -> Block { - if (storage->isColumnOriented()) + if (storage->supportsSamplingColumns()) return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); return storage_snapshot->metadata->getSampleBlock(); }; @@ -690,7 +690,7 @@ Pipe StorageFile::read( { const auto get_columns_for_format = [&]() -> ColumnsDescription { - if (isColumnOriented()) + if (supportsSamplingColumns()) return ColumnsDescription{ storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; else diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 35ab185b14d..803e0466119 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -69,11 +69,11 @@ public: static Strings getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read); - /// Check if the format is column-oriented. - /// Is is useful because column oriented formats could effectively skip unknown columns + /// Check if the format supports reading only some sampling of columns. + /// Is is useful because such formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool isColumnOriented() const override; + bool supportsSamplingColumns() const override; bool supportsPartitionBy() const override { return true; } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6107c1a5117..79bbd5f7254 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -676,9 +676,9 @@ std::shared_ptr StorageS3::createFileIterator( } } -bool StorageS3::isColumnOriented() const +bool StorageS3::supportsSamplingColumns() const { - return FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); + return FormatFactory::instance().checkIfFormatSupportsSamplingColumns(format_name); } Pipe StorageS3::read( @@ -707,7 +707,7 @@ Pipe StorageS3::read( ColumnsDescription columns_description; Block block_for_format; - if (isColumnOriented()) + if (supportsSamplingColumns()) { auto fetch_columns = column_names; const auto & virtuals = getVirtuals(); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index cac5b3c270f..0b369e3a126 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -234,7 +234,7 @@ private: ContextPtr ctx, std::vector * read_keys_in_distributed_processing = nullptr); - bool isColumnOriented() const override; + bool supportsSamplingColumns() const override; }; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 03bd1d5e7d9..71afe058cc7 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -582,9 +582,9 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( return readSchemaFromFormat(format, format_settings, read_buffer_iterator, urls_to_check.size() > 1, context); } -bool IStorageURLBase::isColumnOriented() const +bool IStorageURLBase::supportsSamplingColumns() const { - return FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); + return FormatFactory::instance().checkIfFormatSupportsSamplingColumns(format_name); } Pipe IStorageURLBase::read( @@ -600,7 +600,7 @@ Pipe IStorageURLBase::read( ColumnsDescription columns_description; Block block_for_format; - if (isColumnOriented()) + if (supportsSamplingColumns()) { columns_description = ColumnsDescription{ storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; @@ -688,7 +688,7 @@ Pipe StorageURLWithFailover::read( { ColumnsDescription columns_description; Block block_for_format; - if (isColumnOriented()) + if (supportsSamplingColumns()) { columns_description = ColumnsDescription{ storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 25b88a827b6..44b3ba12e18 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -93,7 +93,7 @@ protected: QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; - bool isColumnOriented() const override; + bool supportsSamplingColumns() const override; private: virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0; diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 2c2f1ec3034..27263723a37 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -140,7 +140,7 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMet chooseCompressionMethod(uri, compression_method)); } -bool StorageXDBC::isColumnOriented() const +bool StorageXDBC::supportsSamplingColumns() const { return true; } diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 910ba162f86..0ce5c6957e7 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -67,7 +67,7 @@ private: Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const override; - bool isColumnOriented() const override; + bool supportsSamplingColumns() const override; }; } diff --git a/tests/performance/formats_columns_sampling.xml b/tests/performance/formats_columns_sampling.xml new file mode 100644 index 00000000000..7812823a175 --- /dev/null +++ b/tests/performance/formats_columns_sampling.xml @@ -0,0 +1,33 @@ + + + 1 + + + + + format + + TabSeparatedWithNames + TabSeparatedRawWithNames + CustomSeparatedWithNames + CSVWithNames + JSONEachRow + JSONCompactEachRowWithNames + TSKV + RowBinaryWithNamesAndTypes + Avro + ORC + Parquet + Arrow + + + + + CREATE TABLE IF NOT EXISTS table_{format} ENGINE = File({format}) AS test.hits + + INSERT INTO table_{format} SELECT * FROM test.hits LIMIT 100000 + + SELECT WatchID FROM table_{format} FORMAT Null + + DROP TABLE IF EXISTS table_{format} + From 9a0cc75f3ae6375f47381457b53a4d8d019a3ad9 Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 13 May 2022 14:13:14 +0000 Subject: [PATCH 129/615] update code style --- src/Storages/WindowView/StorageWindowView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 18639d21c78..8c708153a7a 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1054,7 +1054,7 @@ StorageWindowView::StorageWindowView( auto inner_query = initInnerQuery(query.select->list_of_selects->children.at(0)->as(), context_); - if(is_proctime) + if (is_proctime) next_fire_signal = getWindowUpperBound(std::time(nullptr)); std::exchange(has_inner_table, true); From cef13c2c02a865bd808b86e8b1226559e41f65ec Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 13 May 2022 14:27:15 +0000 Subject: [PATCH 130/615] Allow to skip unknown columns in Native format --- src/Formats/NativeReader.cpp | 28 +++++++++++++------ src/Formats/NativeReader.h | 3 +- src/IO/ReadHelpers.cpp | 6 ++-- src/Processors/Formats/Impl/NativeFormat.cpp | 9 +++--- .../performance/formats_columns_sampling.xml | 1 + 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index ed3aca43d52..3ad0ce5cfc4 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes extern const int INCORRECT_INDEX; extern const int LOGICAL_ERROR; extern const int CANNOT_READ_ALL_DATA; + extern const int INCORRECT_DATA; } @@ -31,8 +32,8 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_) { } -NativeReader::NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_) - : istr(istr_), header(header_), server_revision(server_revision_) +NativeReader::NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, bool skip_unknown_columns_) + : istr(istr_), header(header_), server_revision(server_revision_), skip_unknown_columns(skip_unknown_columns_) { } @@ -186,18 +187,29 @@ Block NativeReader::read() column.column = std::move(read_column); + bool use_in_result = true; if (header) { - /// Support insert from old clients without low cardinality type. - auto & header_column = header.getByName(column.name); - if (!header_column.type->equals(*column.type)) + if (header.has(column.name)) { - column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type); - column.type = header.safeGetByPosition(i).type; + /// Support insert from old clients without low cardinality type. + auto & header_column = header.getByName(column.name); + if (!header_column.type->equals(*column.type)) + { + column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type); + column.type = header.safeGetByPosition(i).type; + } + } + else + { + if (!skip_unknown_columns) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column with name {} found while reading data in Native format", column.name); + use_in_result = false; } } - res.insert(std::move(column)); + if (use_in_result) + res.insert(std::move(column)); if (use_index) ++index_column_it; diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 1f9eb8b9764..3ae53d45faf 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -24,7 +24,7 @@ public: /// For cases when data structure (header) is known in advance. /// NOTE We may use header for data validation and/or type conversions. It is not implemented. - NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_); + NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, bool skip_unknown_columns_ = false); /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read. NativeReader(ReadBuffer & istr_, UInt64 server_revision_, @@ -43,6 +43,7 @@ private: ReadBuffer & istr; Block header; UInt64 server_revision; + bool skip_unknown_columns; bool use_index = false; IndexForNativeFormat::Blocks::const_iterator index_block_it; diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 71ed197da3e..f8226b137fb 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -703,9 +703,9 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & if constexpr (!std::is_same_v) { /** CSV format can contain insignificant spaces and tabs. - * Usually the task of skipping them is for the calling code. - * But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself. - */ + * Usually the task of skipping them is for the calling code. + * But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself. + */ size_t size = s.size(); while (size > 0 && (s[size - 1] == ' ' || s[size - 1] == '\t')) --size; diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index c1dc60022f5..18c3dfa3d4b 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -15,9 +15,9 @@ namespace DB class NativeInputFormat final : public IInputFormat { public: - NativeInputFormat(ReadBuffer & buf, const Block & header_) + NativeInputFormat(ReadBuffer & buf, const Block & header_, const FormatSettings & settings) : IInputFormat(header_, buf) - , reader(std::make_unique(buf, header_, 0)) + , reader(std::make_unique(buf, header_, 0, settings.skip_unknown_fields)) , header(header_) {} String getName() const override { return "Native"; } @@ -112,10 +112,11 @@ void registerInputFormatNative(FormatFactory & factory) ReadBuffer & buf, const Block & sample, const RowInputFormatParams &, - const FormatSettings &) + const FormatSettings & settings) { - return std::make_shared(buf, sample); + return std::make_shared(buf, sample, settings); }); + factory.markFormatSupportsSamplingColumns("Native"); } void registerOutputFormatNative(FormatFactory & factory) diff --git a/tests/performance/formats_columns_sampling.xml b/tests/performance/formats_columns_sampling.xml index 7812823a175..f5dd4395de1 100644 --- a/tests/performance/formats_columns_sampling.xml +++ b/tests/performance/formats_columns_sampling.xml @@ -19,6 +19,7 @@ ORC Parquet Arrow + Native From eba60ff38f220b6c2e9ab6142ebd4760df8e706c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 13 May 2022 17:00:47 +0200 Subject: [PATCH 131/615] Fix restorable schema --- .../registerDiskAzureBlobStorage.cpp | 2 +- src/Disks/DiskDecorator.cpp | 4 +- src/Disks/DiskDecorator.h | 2 +- src/Disks/DiskLocal.cpp | 4 +- src/Disks/DiskLocal.h | 2 +- src/Disks/DiskObjectStorage.cpp | 114 +++++++++++++----- src/Disks/DiskRestartProxy.cpp | 7 +- src/Disks/DiskRestartProxy.h | 2 +- src/Disks/IDisk.h | 2 +- .../IO/WriteIndirectBufferFromRemoteFS.cpp | 3 +- src/Disks/S3/registerDiskS3.cpp | 4 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- 12 files changed, 100 insertions(+), 48 deletions(-) diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 8b2429263bb..56df793783e 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -103,7 +103,7 @@ void registerDiskAzureBlobStorage(DiskFactory & factory) checkRemoveAccess(*azure_blob_storage_disk); } - azure_blob_storage_disk->startup(); + azure_blob_storage_disk->startup(context); if (config.getBool(config_prefix + ".cache_enabled", true)) { diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index 80cfc23d210..02babfbb59f 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -211,9 +211,9 @@ void DiskDecorator::shutdown() delegate->shutdown(); } -void DiskDecorator::startup() +void DiskDecorator::startup(ContextPtr context) { - delegate->startup(); + delegate->startup(context); } void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index d707eb3e51d..b86c520d5d8 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -71,7 +71,7 @@ public: void onFreeze(const String & path) override; SyncGuardPtr getDirectorySyncGuard(const String & path) const override; void shutdown() override; - void startup() override; + void startup(ContextPtr context) override; void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; String getCacheBasePath() const override { return delegate->getCacheBasePath(); } std::vector getRemotePaths(const String & path) const override { return delegate->getRemotePaths(path); } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index a55d588f2b5..e1e299a0d52 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -484,7 +484,7 @@ DiskLocal::DiskLocal( disk_checker = std::make_unique(this, context, local_disk_check_period_ms); } -void DiskLocal::startup() +void DiskLocal::startup(ContextPtr) { try { @@ -672,7 +672,7 @@ void registerDiskLocal(DiskFactory & factory) std::shared_ptr disk = std::make_shared(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0)); - disk->startup(); + disk->startup(context); return std::make_shared(disk); }; factory.registerDiskType("local", creator); diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 61faccbe2a5..101bf0e1f13 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -110,7 +110,7 @@ public: bool isBroken() const override { return broken; } - void startup() override; + void startup(ContextPtr) override; void shutdown() override; diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 04adebf1e82..8f472c713b7 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -32,6 +32,12 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } +static String revisionToString(UInt64 revision) +{ + return std::bitset<64>(revision).to_string(); +} + + DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) { Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); @@ -340,16 +346,35 @@ size_t DiskObjectStorage::getFileSize(const String & path) const return readMetadata(path).total_size; } -void DiskObjectStorage::moveFile(const String & from_path, const String & to_path) +void DiskObjectStorage::moveFile(const String & from_path, const String & to_path, bool should_send_metadata) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "MOVE FILE"); if (exists(to_path)) throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); + if (should_send_metadata) + { + auto revision = metadata_helper->revision_counter + 1; + metadata_helper->revision_counter += 1; + + const ObjectAttributes object_metadata { + {"from_path", from_path}, + {"to_path", to_path} + }; + metadata_helper->createFileOperationObject("rename", revision, object_metadata); + } + metadata_disk->moveFile(from_path, to_path); } +void DiskObjectStorage::moveFile(const String & from_path, const String & to_path) +{ + moveFile(from_path, to_path, send_metadata); +} + void DiskObjectStorage::replaceFile(const String & from_path, const String & to_path) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "REPLACE FILE"); if (exists(to_path)) { const String tmp_path = to_path + ".old"; @@ -363,6 +388,7 @@ void DiskObjectStorage::replaceFile(const String & from_path, const String & to_ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metadata_only) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "Remove shared file"); std::vector paths_to_remove; removeMetadata(path, paths_to_remove); @@ -372,6 +398,7 @@ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metada void DiskObjectStorage::removeFromRemoteFS(const std::vector & paths) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "Read from remote FS"); object_storage->removeObjects(paths); } @@ -416,17 +443,35 @@ bool DiskObjectStorage::checkUniqueId(const String & id) const return checkObjectExists(id); } -void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path) +void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path, bool should_send_metadata) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "HARDLINK FILE"); readUpdateAndStoreMetadata(src_path, false, [](Metadata & metadata) { metadata.ref_count++; return true; }); + if (should_send_metadata && !dst_path.starts_with("shadow/")) + { + auto revision = metadata_helper->revision_counter + 1; + metadata_helper->revision_counter += 1; + const ObjectAttributes object_metadata { + {"src_path", src_path}, + {"dst_path", dst_path} + }; + metadata_helper->createFileOperationObject("hardlink", revision, object_metadata); + } + /// Create FS hardlink to metadata file. metadata_disk->createHardLink(src_path, dst_path); - } +void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path) +{ + createHardLink(src_path, dst_path, send_metadata); +} + + void DiskObjectStorage::setReadOnly(const String & path) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "set readonly"); /// We should store read only flag inside metadata file (instead of using FS flag), /// because we modify metadata file when create hard-links from it. readUpdateAndStoreMetadata(path, false, [](Metadata & metadata) { metadata.read_only = true; return true; }); @@ -560,15 +605,19 @@ void DiskObjectStorage::removeMetadataRecursive(const String & path, std::unorde void DiskObjectStorage::shutdown() { + LOG_INFO(log, "Shutting down disk {}", name); object_storage->shutdown(); + LOG_INFO(log, "Disk {} shut down", name); } -void DiskObjectStorage::startup() +void DiskObjectStorage::startup(ContextPtr context) { LOG_INFO(log, "Starting up disk {}", name); object_storage->startup(); + restoreMetadataIfNeeded(context->getConfigRef(), "storage_configuration.disks." + name, context); + LOG_INFO(log, "Disk {} started up", name); } @@ -649,13 +698,24 @@ std::unique_ptr DiskObjectStorage::writeFile( { auto blob_name = getRandomASCIIString(); + std::optional object_attributes; + if (send_metadata) + { + auto revision = metadata_helper->revision_counter + 1; + metadata_helper->revision_counter++; + object_attributes = { + {"path", path} + }; + blob_name = "r" + revisionToString(revision) + "-file-" + blob_name; + } + auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, {}, create_metadata_callback, buf_size, settings); + return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, object_attributes, create_metadata_callback, buf_size, settings); } @@ -725,10 +785,6 @@ DiskObjectStorageReservation::~DiskObjectStorageReservation() } } -static String revisionToString(UInt64 revision) -{ - return std::bitset<64>(revision).to_string(); -} void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const { @@ -877,8 +933,11 @@ void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { + LOG_INFO(disk->log, "Restore operation for disk {} called", disk->name); + if (!disk->exists(RESTORE_FILE_NAME)) { + LOG_INFO(disk->log, "No restore file '{}' exists, finishing restore", RESTORE_FILE_NAME); return; } @@ -925,6 +984,7 @@ void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfigur if (disk->exists(root)) disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); + LOG_INFO(disk->log, "Old metadata removed, restoring new one"); restoreFiles(source_object_storage, information); restoreFileOperations(source_object_storage, information); @@ -1024,6 +1084,9 @@ void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_objec std::vector keys_names; for (const auto & [key, size] : keys) { + + LOG_INFO(disk->log, "Calling restore for key for disk {}", key); + /// Skip file operations objects. They will be processed separately. if (key.find("/operations/") != String::npos) continue; @@ -1051,6 +1114,7 @@ void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_objec BlobsPathToSize children; source_object_storage->listPrefix(restore_information.source_path, children); + restore_files(children); for (auto & result : results) @@ -1091,7 +1155,7 @@ void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * sourc auto relative_key = shrinkKey(source_path, key); /// Copy object if we restore to different bucket / path. - if (disk->remote_fs_root_path != source_path) + if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != source_path) source_object_storage->copyObjectToAnotherObjectStorage(key, disk->remote_fs_root_path + relative_key, *disk->object_storage); auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) @@ -1107,6 +1171,14 @@ void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * sourc } +void DiskObjectStorage::onFreeze(const String & path) +{ + createDirectories(path); + auto revision_file_buf = metadata_disk->writeFile(path + "revision.txt", 32); + writeIntText(metadata_helper->revision_counter.load(), *revision_file_buf); + revision_file_buf->finalize(); +} + static String pathToDetached(const String & source_path) { if (source_path.ends_with('/')) @@ -1150,16 +1222,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * sou auto to_path = object_attributes["to_path"]; if (disk->exists(from_path)) { - disk->moveFile(from_path, to_path); - if (send_metadata) - { - auto next_revision = ++revision_counter; - const ObjectAttributes object_metadata { - {"from_path", from_path}, - {"to_path", to_path} - }; - createFileOperationObject("rename", next_revision, object_attributes); - } + disk->moveFile(from_path, to_path, send_metadata); LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); @@ -1187,16 +1250,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * sou if (disk->exists(src_path)) { disk->createDirectories(directoryPath(dst_path)); - if (send_metadata && !dst_path.starts_with("shadow/")) - { - auto next_revision = ++revision_counter; - const ObjectAttributes object_metadata { - {"src_path", src_path}, - {"dst_path", dst_path} - }; - createFileOperationObject("hardlink", next_revision, object_attributes); - } - disk->createHardLink(src_path, dst_path); + disk->createHardLink(src_path, dst_path, send_metadata); LOG_TRACE(disk->log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); } } diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 8bb31cec55f..903caf705c5 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -6,8 +6,7 @@ namespace DB { namespace ErrorCodes -{ - extern const int DEADLOCK_AVOIDED; +{extern const int DEADLOCK_AVOIDED; } using Millis = std::chrono::milliseconds; @@ -329,7 +328,7 @@ void DiskRestartProxy::getRemotePathsRecursive(const String & path, std::vector< return DiskDecorator::getRemotePathsRecursive(path, paths_map); } -void DiskRestartProxy::restart() +void DiskRestartProxy::restart(ContextPtr context) { /// Speed up processing unhealthy requests. DiskDecorator::shutdown(); @@ -352,7 +351,7 @@ void DiskRestartProxy::restart() LOG_INFO(log, "Restart lock acquired. Restarting disk {}", DiskDecorator::getName()); - DiskDecorator::startup(); + DiskDecorator::startup(context); LOG_INFO(log, "Disk restarted {}", DiskDecorator::getName()); } diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h index d30c2fdbbfb..084e06e3f18 100644 --- a/src/Disks/DiskRestartProxy.h +++ b/src/Disks/DiskRestartProxy.h @@ -68,7 +68,7 @@ public: std::vector getRemotePaths(const String & path) const override; void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; - void restart(); + void restart(ContextPtr context); private: friend class RestartAwareReadBuffer; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 1071e1294b6..cf8b1a09ce9 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -297,7 +297,7 @@ public: virtual void shutdown() {} /// Performs action on disk startup. - virtual void startup() {} + virtual void startup(ContextPtr) {} /// Return some uniq string for file, overrode for IDiskRemote /// Required for distinguish different copies of the same part on remote disk diff --git a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp index dca2fb17ba7..77da60ca07d 100644 --- a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp @@ -36,7 +36,8 @@ WriteIndirectBufferFromRemoteFS::~WriteIndirectBufferFromRemoteFS() void WriteIndirectBufferFromRemoteFS::finalizeImpl() { WriteBufferFromFileDecorator::finalizeImpl(); - create_metadata_callback(count()); + if (create_metadata_callback) + create_metadata_callback(count()); } diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 54b736788fa..b344375f05b 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -96,9 +96,7 @@ void registerDiskS3(DiskFactory & factory) checkRemoveAccess(*s3disk); } - s3disk->startup(); - - s3disk->restoreMetadataIfNeeded(config, config_prefix, context); + s3disk->startup(context); std::shared_ptr disk_result = s3disk; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index b52645c7854..d49ab933f23 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -780,7 +780,7 @@ void InterpreterSystemQuery::restartDisk(String & name) auto disk = getContext()->getDisk(name); if (DiskRestartProxy * restart_proxy = dynamic_cast(disk.get())) - restart_proxy->restart(); + restart_proxy->restart(getContext()); else throw Exception("Disk " + name + " doesn't have possibility to restart", ErrorCodes::BAD_ARGUMENTS); } From febabb2787b01c0842edf57e8e8f92f12ced65a0 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 13 May 2022 15:08:02 +0000 Subject: [PATCH 132/615] Fix build --- src/IO/ReadHelpers.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index f8226b137fb..e0eab20d421 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -256,6 +256,7 @@ void readString(String & s, ReadBuffer & buf) template void readStringInto>(PaddedPODArray & s, ReadBuffer & buf); template void readStringInto(String & s, ReadBuffer & buf); +template void readStringInto(NullOutput & s, ReadBuffer & buf); template void readStringUntilEOFInto(Vector & s, ReadBuffer & buf) @@ -1429,6 +1430,8 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf) } } +template void readQuotedFieldInto(NullOutput & s, ReadBuffer & buf); + void readQuotedField(String & s, ReadBuffer & buf) { s.clear(); From b6f4a1b9f9ecc0cf6aef72b72e68ec185f7b69cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 13 May 2022 17:21:04 +0200 Subject: [PATCH 133/615] Missed change --- src/Disks/DiskObjectStorage.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index 7e5d30dfea2..83a3ba6b508 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -88,6 +88,8 @@ public: void moveFile(const String & from_path, const String & to_path) override; + void moveFile(const String & from_path, const String & to_path, bool should_send_metadata); + void replaceFile(const String & from_path, const String & to_path) override; void removeFile(const String & path) override { removeSharedFile(path, false); } @@ -119,6 +121,7 @@ public: bool checkUniqueId(const String & id) const override; void createHardLink(const String & src_path, const String & dst_path) override; + void createHardLink(const String & src_path, const String & dst_path, bool should_send_metadata); void listFiles(const String & path, std::vector & file_names) override; @@ -146,7 +149,7 @@ public: void shutdown() override; - void startup() override; + void startup(ContextPtr context) override; ReservationPtr reserve(UInt64 bytes) override; @@ -165,6 +168,8 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override; void restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); + + void onFreeze(const String & path) override; private: const String name; const String remote_fs_root_path; From 4549d2f40ffae2df3d9f6f6fc924acdb98e1e45e Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 13 May 2022 17:41:26 +0200 Subject: [PATCH 134/615] Fix build --- src/IO/ReadHelpers.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ebf6e98d9a7..146aef9a58d 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -618,6 +618,7 @@ void readStringUntilNewlineInto(Vector & s, ReadBuffer & buf); struct NullOutput { void append(const char *, size_t) {} + void append(const char *) {} void push_back(char) {} /// NOLINT }; From 3bdddea3cf2286a7d59188f47a3941ff43c546ab Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 13 May 2022 17:45:28 +0000 Subject: [PATCH 135/615] update test --- ...065_window_view_event_hop_watch_bounded.py | 37 ++++++++++--------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index 7cee8081352..2a2dc508dd3 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys -import signal CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) @@ -25,31 +26,31 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect("Ok.") - client1.send("DROP TABLE IF EXISTS test.wv") - client1.expect("Ok.") - client1.send("DROP TABLE IF EXISTS `.inner.wv`") - client1.expect("Ok.") + client1.send("CREATE DATABASE 01065_window_view_event_hop_watch_bounded") + client1.expect(prompt) + client1.send("DROP TABLE IF EXISTS 01065_window_view_event_hop_watch_bounded.mt") + client1.expect(prompt) + client1.send("DROP TABLE IF EXISTS 01065_window_view_event_hop_watch_bounded.wv") + client1.expect(prompt) client1.send( - "CREATE TABLE test.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" + "CREATE TABLE 01065_window_view_event_hop_watch_bounded.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" ) - client1.expect("Ok.") + client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW test.wv WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM test.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01065_window_view_event_hop_watch_bounded.wv WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01065_window_view_event_hop_watch_bounded.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect("Ok.") - client1.send("WATCH test.wv") - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:00');") + client1.send("WATCH 01065_window_view_event_hop_watch_bounded.wv") + client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');") client2.expect("Ok.") - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:05');") + client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:05');") client2.expect("Ok.") client1.expect("1" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:06');") + client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:06');") client2.expect("Ok.") - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:10');") + client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:10');") client2.expect("Ok.") client1.expect("2" + end_of_block) @@ -59,7 +60,9 @@ with client(name="client1>", log=log) as client1, client( if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) - client1.send("DROP TABLE test.wv") + client1.send("DROP TABLE 01065_window_view_event_hop_watch_bounded.wv") client1.expect(prompt) - client1.send("DROP TABLE test.mt") + client1.send("DROP TABLE 01065_window_view_event_hop_watch_bounded.mt") client1.expect(prompt) + client1.send("DROP DATABASE IF EXISTS 01065_window_view_event_hop_watch_bounded") + client1.expect(prompt) \ No newline at end of file From 68bb07d16638d76c8778bf6c51fef797e786fba9 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 13 May 2022 18:39:19 +0000 Subject: [PATCH 136/615] Better naming --- src/Formats/FormatFactory.cpp | 8 ++++---- src/Formats/FormatFactory.h | 6 +++--- src/Formats/registerWithNamesAndTypes.cpp | 4 ++-- src/IO/ReadHelpers.h | 1 + src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp | 2 +- src/Processors/Formats/Impl/BinaryRowInputFormat.cpp | 2 +- .../Formats/Impl/CapnProtoRowInputFormat.cpp | 2 +- .../Formats/Impl/JSONEachRowRowInputFormat.cpp | 8 ++++---- src/Processors/Formats/Impl/NativeFormat.cpp | 2 +- src/Processors/Formats/Impl/ORCBlockInputFormat.cpp | 2 +- .../Formats/Impl/ParquetBlockInputFormat.cpp | 2 +- .../Formats/Impl/ProtobufListInputFormat.cpp | 2 +- src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp | 2 +- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 6 +++--- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/Hive/StorageHive.cpp | 6 +++--- src/Storages/Hive/StorageHive.h | 2 +- src/Storages/IStorage.h | 2 +- src/Storages/StorageFile.cpp | 10 +++++----- src/Storages/StorageFile.h | 2 +- src/Storages/StorageS3.cpp | 6 +++--- src/Storages/StorageS3.h | 2 +- src/Storages/StorageURL.cpp | 8 ++++---- src/Storages/StorageURL.h | 2 +- src/Storages/StorageXDBC.cpp | 2 +- src/Storages/StorageXDBC.h | 2 +- 27 files changed, 49 insertions(+), 48 deletions(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a05ba2e1f38..961bed1f141 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -538,19 +538,19 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na } -void FormatFactory::markFormatSupportsSamplingColumns(const String & name) +void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) { - auto & target = dict[name].supports_sampling_columns; + auto & target = dict[name].supports_subset_of_columns_columns; if (target) throw Exception("FormatFactory: Format " + name + " is already marked as column oriented", ErrorCodes::LOGICAL_ERROR); target = true; } -bool FormatFactory::checkIfFormatSupportsSamplingColumns(const String & name) +bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name) { const auto & target = getCreators(name); - return target.supports_sampling_columns; + return target.supports_subset_of_columns_columns; } bool FormatFactory::isInputFormat(const String & name) const diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 5d8c2e133f2..006d5d05099 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -108,7 +108,7 @@ private: SchemaReaderCreator schema_reader_creator; ExternalSchemaReaderCreator external_schema_reader_creator; bool supports_parallel_formatting{false}; - bool supports_sampling_columns{false}; + bool supports_subset_of_columns_columns{false}; NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; AppendSupportChecker append_support_checker; }; @@ -194,9 +194,9 @@ public: void registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator); void markOutputFormatSupportsParallelFormatting(const String & name); - void markFormatSupportsSamplingColumns(const String & name); + void markFormatSupportsSubsetOfColumns(const String & name); - bool checkIfFormatSupportsSamplingColumns(const String & name); + bool checkIfFormatSupportsSubsetOfColumns(const String & name); bool checkIfFormatHasSchemaReader(const String & name); bool checkIfFormatHasExternalSchemaReader(const String & name); diff --git a/src/Formats/registerWithNamesAndTypes.cpp b/src/Formats/registerWithNamesAndTypes.cpp index 81a711e736b..2dee107844d 100644 --- a/src/Formats/registerWithNamesAndTypes.cpp +++ b/src/Formats/registerWithNamesAndTypes.cpp @@ -12,8 +12,8 @@ void registerWithNamesAndTypes(const std::string & base_format_name, RegisterWit void markFormatWithNamesAndTypesSupportsSamplingColumns(const std::string & base_format_name, FormatFactory & factory) { - factory.markFormatSupportsSamplingColumns(base_format_name + "WithNames"); - factory.markFormatSupportsSamplingColumns(base_format_name + "WithNamesAndTypes"); + factory.markFormatSupportsSubsetOfColumns(base_format_name + "WithNames"); + factory.markFormatSupportsSubsetOfColumns(base_format_name + "WithNamesAndTypes"); } } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 146aef9a58d..32ec2fabe01 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -619,6 +619,7 @@ struct NullOutput { void append(const char *, size_t) {} void append(const char *) {} + void append(const char *, const char *) {} void push_back(char) {} /// NOLINT }; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 3d9a536b565..4c99db0693f 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -188,7 +188,7 @@ void registerInputFormatArrow(FormatFactory & factory) { return std::make_shared(buf, sample, false, format_settings); }); - factory.markFormatSupportsSamplingColumns("Arrow"); + factory.markFormatSupportsSubsetOfColumns("Arrow"); factory.registerInputFormat( "ArrowStream", [](ReadBuffer & buf, diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index 080c3173b17..ced8bd63c6f 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -114,7 +114,7 @@ void registerInputFormatRowBinary(FormatFactory & factory) }; registerWithNamesAndTypes("RowBinary", register_func); - factory.markFormatSupportsSamplingColumns("RowBinaryWithNamesAndTypes"); + factory.markFormatSupportsSubsetOfColumns("RowBinaryWithNamesAndTypes"); factory.registerFileExtension("bin", "RowBinary"); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index 61394ef6d22..ad173e449d6 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -310,7 +310,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) return std::make_shared(buf, sample, std::move(params), FormatSchemaInfo(settings, "CapnProto", true), settings); }); - factory.markFormatSupportsSamplingColumns("CapnProto"); + factory.markFormatSupportsSubsetOfColumns("CapnProto"); factory.registerFileExtension("capnp", "CapnProto"); } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index bc820c61fca..0712284af55 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -394,10 +394,10 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) return std::make_shared(buf, sample, std::move(params), settings, true); }); - factory.markFormatSupportsSamplingColumns("JSONEachRow"); - factory.markFormatSupportsSamplingColumns("JSONLines"); - factory.markFormatSupportsSamplingColumns("NDJSON"); - factory.markFormatSupportsSamplingColumns("JSONStringsEachRow"); + factory.markFormatSupportsSubsetOfColumns("JSONEachRow"); + factory.markFormatSupportsSubsetOfColumns("JSONLines"); + factory.markFormatSupportsSubsetOfColumns("NDJSON"); + factory.markFormatSupportsSubsetOfColumns("JSONStringsEachRow"); } void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index 18c3dfa3d4b..423fd483712 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -116,7 +116,7 @@ void registerInputFormatNative(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSamplingColumns("Native"); + factory.markFormatSupportsSubsetOfColumns("Native"); } void registerOutputFormatNative(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 0614eb3fc30..66fb000f0cc 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -200,7 +200,7 @@ void registerInputFormatORC(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSamplingColumns("ORC"); + factory.markFormatSupportsSubsetOfColumns("ORC"); } void registerORCSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index a49a7a4e0ca..6add74e2865 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -193,7 +193,7 @@ void registerInputFormatParquet(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSamplingColumns("Parquet"); + factory.markFormatSupportsSubsetOfColumns("Parquet"); } void registerParquetSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp index 91a8c10643e..4599734591f 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp @@ -79,7 +79,7 @@ void registerInputFormatProtobufList(FormatFactory & factory) return std::make_shared(buf, sample, std::move(params), FormatSchemaInfo(settings, "Protobuf", true), settings.protobuf.input_flatten_google_wrappers); }); - factory.markFormatSupportsSamplingColumns("ProtobufList"); + factory.markFormatSupportsSubsetOfColumns("ProtobufList"); } void registerProtobufListSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index ed6222af4e1..0376bf2c292 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -69,7 +69,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) with_length_delimiter, settings.protobuf.input_flatten_google_wrappers); }); - factory.markFormatSupportsSamplingColumns(with_length_delimiter ? "Protobuf" : "ProtobufSingle"); + factory.markFormatSupportsSubsetOfColumns(with_length_delimiter ? "Protobuf" : "ProtobufSingle"); } } diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 113f18f73b6..e57b744c1f8 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -281,7 +281,7 @@ void registerInputFormatTSKV(FormatFactory & factory) return std::make_shared(buf, sample, std::move(params), settings); }); - factory.markFormatSupportsSamplingColumns("TSKV"); + factory.markFormatSupportsSubsetOfColumns("TSKV"); } void registerTSKVSchemaReader(FormatFactory & factory) { diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 5d17bcb567e..abd0a762bfa 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -476,9 +476,9 @@ private: }; -bool StorageHDFS::supportsSamplingColumns() const +bool StorageHDFS::supportsSubsetOfColumns() const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSamplingColumns(format_name); + return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); } Pipe StorageHDFS::read( @@ -527,7 +527,7 @@ Pipe StorageHDFS::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSamplingColumns()) + if (supportsSubsetOfColumns()) { auto fetch_columns = column_names; const auto & virtuals = getVirtuals(); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 28c23c366d4..c8ebbfcfaac 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -57,7 +57,7 @@ public: /// Is is useful because column oriented formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSamplingColumns() const override; + bool supportsSubsetOfColumns() const override; static ColumnsDescription getTableStructureFromData( const String & format, diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 63c3ce03945..831fd54230d 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -622,14 +622,14 @@ HiveFilePtr StorageHive::getHiveFileIfNeeded( return hive_file; } -bool StorageHive::supportsSamplingColumns() const +bool StorageHive::supportsSubsetOfColumns() const { return format_name == "Parquet" || format_name == "ORC"; } void StorageHive::getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const { - if (!supportsSamplingColumns()) + if (!supportsSubsetOfColumns()) sample_block = header_block; UInt32 erased_columns = 0; for (const auto & column : partition_columns) @@ -795,7 +795,7 @@ std::optional StorageHive::totalRowsImpl(const Settings & settings, const SelectQueryInfo & query_info, ContextPtr context_, PruneLevel prune_level) const { /// Row-based format like Text doesn't support totalRowsByPartitionPredicate - if (!supportsSamplingColumns()) + if (!supportsSubsetOfColumns()) return {}; auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index fdd74ecd7a2..245f1768f3f 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -63,7 +63,7 @@ public: NamesAndTypesList getVirtuals() const override; - bool supportsSamplingColumns() const override; + bool supportsSubsetOfColumns() const override; std::optional totalRows(const Settings & settings) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const override; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 131aed1bef0..944dac3252d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -585,7 +585,7 @@ public: /// Returns true if all disks of storage are read-only. virtual bool isStaticStorage() const; - virtual bool supportsSamplingColumns() const { return false; } + virtual bool supportsSubsetOfColumns() const { return false; } /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 5899959c753..4d89278f91c 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -316,9 +316,9 @@ ColumnsDescription StorageFile::getTableStructureFromFile( return readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context); } -bool StorageFile::supportsSamplingColumns() const +bool StorageFile::supportsSubsetOfColumns() const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSamplingColumns(format_name); + return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); } StorageFile::StorageFile(int table_fd_, CommonArguments args) @@ -465,7 +465,7 @@ public: const ColumnsDescription & columns_description, const FilesInfoPtr & files_info) { - if (storage->supportsSamplingColumns()) + if (storage->supportsSubsetOfColumns()) return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); else return getHeader(storage_snapshot->metadata, files_info->need_path_column, files_info->need_file_column); @@ -530,7 +530,7 @@ public: auto get_block_for_format = [&]() -> Block { - if (storage->supportsSamplingColumns()) + if (storage->supportsSubsetOfColumns()) return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); return storage_snapshot->metadata->getSampleBlock(); }; @@ -690,7 +690,7 @@ Pipe StorageFile::read( { const auto get_columns_for_format = [&]() -> ColumnsDescription { - if (supportsSamplingColumns()) + if (supportsSubsetOfColumns()) return ColumnsDescription{ storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; else diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 803e0466119..66b71476653 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -73,7 +73,7 @@ public: /// Is is useful because such formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSamplingColumns() const override; + bool supportsSubsetOfColumns() const override; bool supportsPartitionBy() const override { return true; } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 79bbd5f7254..67dd93e3138 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -676,9 +676,9 @@ std::shared_ptr StorageS3::createFileIterator( } } -bool StorageS3::supportsSamplingColumns() const +bool StorageS3::supportsSubsetOfColumns() const { - return FormatFactory::instance().checkIfFormatSupportsSamplingColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); } Pipe StorageS3::read( @@ -707,7 +707,7 @@ Pipe StorageS3::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSamplingColumns()) + if (supportsSubsetOfColumns()) { auto fetch_columns = column_names; const auto & virtuals = getVirtuals(); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 0b369e3a126..102f74b83cd 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -234,7 +234,7 @@ private: ContextPtr ctx, std::vector * read_keys_in_distributed_processing = nullptr); - bool supportsSamplingColumns() const override; + bool supportsSubsetOfColumns() const override; }; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 71afe058cc7..a40c26043c1 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -582,9 +582,9 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( return readSchemaFromFormat(format, format_settings, read_buffer_iterator, urls_to_check.size() > 1, context); } -bool IStorageURLBase::supportsSamplingColumns() const +bool IStorageURLBase::supportsSubsetOfColumns() const { - return FormatFactory::instance().checkIfFormatSupportsSamplingColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); } Pipe IStorageURLBase::read( @@ -600,7 +600,7 @@ Pipe IStorageURLBase::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSamplingColumns()) + if (supportsSubsetOfColumns()) { columns_description = ColumnsDescription{ storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; @@ -688,7 +688,7 @@ Pipe StorageURLWithFailover::read( { ColumnsDescription columns_description; Block block_for_format; - if (supportsSamplingColumns()) + if (supportsSubsetOfColumns()) { columns_description = ColumnsDescription{ storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 44b3ba12e18..85c77b00550 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -93,7 +93,7 @@ protected: QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; - bool supportsSamplingColumns() const override; + bool supportsSubsetOfColumns() const override; private: virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0; diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 27263723a37..f44daf2557e 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -140,7 +140,7 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMet chooseCompressionMethod(uri, compression_method)); } -bool StorageXDBC::supportsSamplingColumns() const +bool StorageXDBC::supportsSubsetOfColumns() const { return true; } diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 0ce5c6957e7..442db5277e0 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -67,7 +67,7 @@ private: Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const override; - bool supportsSamplingColumns() const override; + bool supportsSubsetOfColumns() const override; }; } From d1a9ebaaaf959e98cbd65c22d4cb1a9755e1ded6 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 03:59:23 +0000 Subject: [PATCH 137/615] update code style --- ...1065_window_view_event_hop_watch_bounded.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index 2a2dc508dd3..cd2c2d06a16 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -43,14 +43,22 @@ with client(name="client1>", log=log) as client1, client( client1.expect("Ok.") client1.send("WATCH 01065_window_view_event_hop_watch_bounded.wv") - client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');" + ) client2.expect("Ok.") - client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:05');") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:05');" + ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:06');") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:06');" + ) client2.expect("Ok.") - client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:10');") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:10');" + ) client2.expect("Ok.") client1.expect("2" + end_of_block) @@ -65,4 +73,4 @@ with client(name="client1>", log=log) as client1, client( client1.send("DROP TABLE 01065_window_view_event_hop_watch_bounded.mt") client1.expect(prompt) client1.send("DROP DATABASE IF EXISTS 01065_window_view_event_hop_watch_bounded") - client1.expect(prompt) \ No newline at end of file + client1.expect(prompt) From b52cac970d19cc892daf5a9f0007da202604b766 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 06:35:50 +0000 Subject: [PATCH 138/615] fix alter on replicated engine --- src/Storages/WindowView/StorageWindowView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 88c31a8fd58..8dbaaa295aa 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -479,7 +479,7 @@ void StorageWindowView::alter( auto inner_query = initInnerQuery(new_select_query->as(), local_context); - dropInnerTableIfAny(true, getContext()); + dropInnerTableIfAny(true, local_context); /// create inner table std::exchange(has_inner_table, true); From 46f1a59f3d4e53d47b4fe7b2e2134e11b41f2dee Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 06:36:08 +0000 Subject: [PATCH 139/615] update test --- .../0_stateless/01078_window_view_alter_query_watch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 8d1e7a6f667..29b71c3b949 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -52,12 +52,13 @@ with client(name="client1>", log=log) as client1, client( "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:06');" ) client2.expect("Ok.") - client1.expect("1*" + end_of_block) + client1.expect("1" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") client2.send( "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) * 2 AS count, tumbleEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" ) client2.expect("Ok.") + client2.expect(prompt) client1.expect(prompt) client1.send("WATCH 01078_window_view_alter_query_watch.wv") client1.expect("Query id" + end_of_block) @@ -69,7 +70,7 @@ with client(name="client1>", log=log) as client1, client( "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:18');" ) client2.expect("Ok.") - client1.expect("2*" + end_of_block) + client1.expect("2" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") # send Ctrl-C From f2ef27e5d64884a13ad4bc85e6ef0a9b6a47c95f Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 09:21:54 +0000 Subject: [PATCH 140/615] rename windowview header --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 35 +++++++++---------- src/Storages/WindowView/StorageWindowView.h | 10 +++--- src/Storages/WindowView/WindowViewSource.h | 4 +-- 5 files changed, 25 insertions(+), 28 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index b36940de501..4c0e9420280 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -104,7 +104,7 @@ Block InterpreterInsertQuery::getSampleBlock( if (!query.columns) { if (auto * window_view = dynamic_cast(table.get())) - return window_view->getHeader(); + return window_view->getInputHeader(); else if (no_destination) return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); else diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 7d567143a58..a3cc620e418 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -392,7 +392,7 @@ Chain buildPushingToViewsChain( } else if (auto * window_view = dynamic_cast(storage.get())) { - auto sink = std::make_shared(window_view->getHeader(), *window_view, storage, context); + auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 0e55a090d2d..cab07b12858 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -626,7 +626,7 @@ ASTPtr StorageWindowView::getSourceTableSelectQuery() } auto select_list = std::make_shared(); - for (const auto & column_name : source_header.getNames()) + for (const auto & column_name : getInputHeader().getNames()) select_list->children.emplace_back(std::make_shared(column_name)); modified_select.setExpression(ASTSelectQuery::Expression::SELECT, select_list); @@ -1108,9 +1108,6 @@ StorageWindowView::StorageWindowView( select_query = query.select->list_of_selects->children.at(0)->clone(); - source_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns)) - .getSampleBlock(); - String select_database_name = getContext()->getCurrentDatabase(); String select_table_name; auto select_query_tmp = select_query->clone(); @@ -1521,26 +1518,26 @@ void StorageWindowView::dropInnerTableIfAny(bool no_delay, ContextPtr local_cont } } -Block StorageWindowView::getHeader() const -{ - return source_header; -} - -Block StorageWindowView::getTargetHeader() const +const Block & StorageWindowView::getInputHeader() const { std::lock_guard lock(sample_block_lock); - if (!target_header) + if (!input_header) { - target_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::Complete)) + input_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns)) .getSampleBlock(); - /// convert all columns to full columns - /// in case some of them are constant - for (size_t i = 0; i < target_header.columns(); ++i) - { - target_header.safeGetByPosition(i).column = target_header.safeGetByPosition(i).column->convertToFullColumnIfConst(); - } } - return target_header; + return input_header; +} + +const Block & StorageWindowView::getOutputHeader() const +{ + std::lock_guard lock(sample_block_lock); + if (!output_header) + { + output_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::Complete)) + .getSampleBlock(); + } + return output_header; } StoragePtr StorageWindowView::getSourceTable() const diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 1df4035595a..fb63bcb3cec 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -174,7 +174,9 @@ public: ASTPtr getSourceTableSelectQuery(); - Block getHeader() const; + const Block & getInputHeader() const; + + const Block & getOutputHeader() const; private: Poco::Logger * log; @@ -191,8 +193,8 @@ private: bool is_tumble; // false if is hop std::atomic shutdown_called{false}; bool has_inner_table{true}; - mutable Block source_header; - mutable Block target_header; + mutable Block input_header; + mutable Block output_header; UInt64 clean_interval_ms; const DateLUTImpl * time_zone = nullptr; UInt32 max_timestamp = 0; @@ -265,7 +267,5 @@ private: StoragePtr getSourceTable() const; StoragePtr getInnerTable() const; StoragePtr getTargetTable() const; - - Block getTargetHeader() const; }; } diff --git a/src/Storages/WindowView/WindowViewSource.h b/src/Storages/WindowView/WindowViewSource.h index ae5eecfac3c..234784bf1d3 100644 --- a/src/Storages/WindowView/WindowViewSource.h +++ b/src/Storages/WindowView/WindowViewSource.h @@ -20,7 +20,7 @@ public: : SourceWithProgress( is_events_ ? Block( {ColumnWithTypeAndName(ColumnUInt32::create(), std::make_shared(window_view_timezone_), "watermark")}) - : storage_->getTargetHeader()) + : storage_->getOutputHeader()) , storage(storage_) , is_events(is_events_) , window_view_timezone(window_view_timezone_) @@ -32,7 +32,7 @@ public: header.insert( ColumnWithTypeAndName(ColumnUInt32::create(), std::make_shared(window_view_timezone_), "watermark")); else - header = storage->getTargetHeader(); + header = storage->getOutputHeader(); } String getName() const override { return "WindowViewSource"; } From f8a68fff5b7292525d57eac9eada998b414a1c11 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 09:39:55 +0000 Subject: [PATCH 141/615] update test --- .../01078_window_view_alter_query_watch.py | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 29b71c3b949..5f64a8783e1 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -13,11 +13,10 @@ log = None # uncomment the line below for debugging # log=sys.stdout -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: +with client(name="client1>", log=log) as client1, client(name="client2>", log=log) as client2, client(name="client3>", log=log) as client3: client1.expect(prompt) client2.expect(prompt) + client3.expect(prompt) client1.send("SET allow_experimental_window_view = 1") client1.expect(prompt) @@ -25,6 +24,8 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client3.send("SET allow_experimental_window_view = 1") + client3.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01078_window_view_alter_query_watch") client1.expect(prompt) @@ -58,10 +59,8 @@ with client(name="client1>", log=log) as client1, client( "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) * 2 AS count, tumbleEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" ) client2.expect("Ok.") - client2.expect(prompt) - client1.expect(prompt) - client1.send("WATCH 01078_window_view_alter_query_watch.wv") - client1.expect("Query id" + end_of_block) + client3.send("WATCH 01078_window_view_alter_query_watch.wv") + client3.expect("Query id" + end_of_block) client2.send( "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:11');" ) @@ -70,18 +69,18 @@ with client(name="client1>", log=log) as client1, client( "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:18');" ) client2.expect("Ok.") - client1.expect("2" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client3.expect("2" + end_of_block) + client3.expect("Progress: 1.00 rows.*\)") # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + client3.send("\x03", eol="") + match = client3.expect("(%s)|([#\$] )" % prompt) if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE 01078_window_view_alter_query_watch.wv NO DELAY;") - client1.expect(prompt) - client1.send("DROP TABLE 01078_window_view_alter_query_watch.mt;") - client1.expect(prompt) - client1.send("DROP DATABASE IF EXISTS 01078_window_view_alter_query_watch;") - client1.expect(prompt) + client3.send(client3.command) + client3.expect(prompt) + client3.send("DROP TABLE 01078_window_view_alter_query_watch.wv NO DELAY;") + client3.expect(prompt) + client3.send("DROP TABLE 01078_window_view_alter_query_watch.mt;") + client3.expect(prompt) + client3.send("DROP DATABASE IF EXISTS 01078_window_view_alter_query_watch;") + client3.expect(prompt) From 7802c0d7d9e4a86c3af04d5ddfc4bcdddbda3050 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 11:36:22 +0000 Subject: [PATCH 142/615] update test --- .../0_stateless/01078_window_view_alter_query_watch.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 5f64a8783e1..bfd71146e1f 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -13,7 +13,9 @@ log = None # uncomment the line below for debugging # log=sys.stdout -with client(name="client1>", log=log) as client1, client(name="client2>", log=log) as client2, client(name="client3>", log=log) as client3: +with client(name="client1>", log=log) as client1, client( + name="client2>", log=log +) as client2, client(name="client3>", log=log) as client3: client1.expect(prompt) client2.expect(prompt) client3.expect(prompt) @@ -59,6 +61,9 @@ with client(name="client1>", log=log) as client1, client(name="client2>", log=lo "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) * 2 AS count, tumbleEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" ) client2.expect("Ok.") + client2.expect(prompt) + client1.expect("1 row" + end_of_block) + client1.expect(prompt) client3.send("WATCH 01078_window_view_alter_query_watch.wv") client3.expect("Query id" + end_of_block) client2.send( From 4540cf8925f3327978fa35416b6cdc26c459485f Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 13:46:47 +0200 Subject: [PATCH 143/615] Style check fixes --- src/Disks/DiskObjectStorage.cpp | 3 +- src/Disks/IDisk.h | 2 +- src/Disks/S3/diskSettings.cpp | 5 ++ src/Disks/S3ObjectStorage.cpp | 54 +++++++++----------- src/Disks/S3ObjectStorage.h | 2 +- tests/integration/test_merge_tree_s3/test.py | 4 +- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 8f472c713b7..01f01fdeaa4 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -29,7 +29,8 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; extern const int MEMORY_LIMIT_EXCEEDED; - extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } static String revisionToString(UInt64 revision) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index cf8b1a09ce9..e4a0b84448c 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -290,7 +290,7 @@ public: virtual bool isReadOnly() const { return false; } - /// Check if disk is broken. Broken disks will have 0 space and connot be used. + /// Check if disk is broken. Broken disks will have 0 space and cannot be used. virtual bool isBroken() const { return false; } /// Invoked when Global Context is shutdown. diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/S3/diskSettings.cpp index c4cd3253a21..579f160abd4 100644 --- a/src/Disks/S3/diskSettings.cpp +++ b/src/Disks/S3/diskSettings.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { S3Settings::ReadWriteSettings rw_settings; diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index 0a7bd45d546..e6c3d357265 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -35,10 +35,6 @@ namespace DB namespace ErrorCodes { extern const int S3_ERROR; - extern const int FILE_ALREADY_EXISTS; - extern const int UNKNOWN_FORMAT; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; } namespace @@ -82,14 +78,10 @@ bool S3ObjectStorage::exists(const std::string & path) const if (!object_head.IsSuccess()) { if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "OBJECT DOESNT {} EXISTS", path); return false; - } throwIfError(object_head); } - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "OBJECT {} EXISTS", path); return true; } @@ -102,31 +94,31 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT std::optional) const { - ReadSettings disk_read_settings{read_settings}; - if (cache) - { - if (IFileCache::isReadOnly()) - disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + ReadSettings disk_read_settings{read_settings}; + if (cache) + { + if (IFileCache::isReadOnly()) + disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; - disk_read_settings.remote_fs_cache = cache; - } + disk_read_settings.remote_fs_cache = cache; + } - auto settings_ptr = s3_settings.get(); + auto settings_ptr = s3_settings.get(); - auto s3_impl = std::make_unique( - client.get(), bucket, version_id, common_path_prefix, blobs_to_read, - settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); + auto s3_impl = std::make_unique( + client.get(), bucket, version_id, common_path_prefix, blobs_to_read, + settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); - if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - auto reader = getThreadPoolReader(); - return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); - } - else - { - auto buf = std::make_unique(std::move(s3_impl)); - return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); - } + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + { + auto reader = getThreadPoolReader(); + return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); + } + else + { + auto buf = std::make_unique(std::move(s3_impl)); + return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); + } } std::unique_ptr S3ObjectStorage::readObject( /// NOLINT @@ -135,8 +127,8 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT std::optional, std::optional) const { - auto settings_ptr = s3_settings.get(); - return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, read_settings); + auto settings_ptr = s3_settings.get(); + return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, read_settings); } diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h index 7632a643130..81595d4385d 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/S3ObjectStorage.h @@ -56,7 +56,7 @@ public: , s3_settings(std::move(s3_settings_)) , version_id(std::move(version_id_)) {} - + bool exists(const std::string & path) const override; std::unique_ptr readObject( /// NOLINT diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index c7041c05403..38553f27ac1 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -229,7 +229,9 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): assert ( node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)" ) - wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD, timeout=45) + wait_for_delete_s3_objects( + cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD, timeout=45 + ) @pytest.mark.parametrize("node_name", ["node"]) From 47834cbabea440880b87d787b5d21688ae1372d6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 14:06:09 +0200 Subject: [PATCH 144/615] Satisfy clang-tidy and style check --- src/Disks/DiskObjectStorage.cpp | 2 +- src/Disks/IObjectStorage.h | 11 +++++++++-- src/Disks/S3ObjectStorage.cpp | 2 +- src/Disks/S3ObjectStorage.h | 11 +++++++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 01f01fdeaa4..d2384a82d13 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -1003,7 +1003,7 @@ void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfigur } } -void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) +void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) /// NOLINT { auto buffer = disk->metadata_disk->readFile(RESTORE_FILE_NAME, ReadSettings{}, 512); buffer->next(); diff --git a/src/Disks/IObjectStorage.h b/src/Disks/IObjectStorage.h index 6a66ffb622e..64ba6e75281 100644 --- a/src/Disks/IObjectStorage.h +++ b/src/Disks/IObjectStorage.h @@ -95,9 +95,16 @@ public: virtual void removeObjectsIfExist(const std::vector & paths) = 0; - virtual void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) = 0; + virtual void copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes = {}) = 0; - virtual void copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes = {}); + virtual void copyObjectToAnotherObjectStorage( /// NOLINT + const std::string & object_from, + const std::string & object_to, + IObjectStorage & object_storage_to, + std::optional object_to_attributes = {}); virtual ~IObjectStorage() = default; diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index e6c3d357265..94a9a42807a 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -132,7 +132,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT } -std::unique_ptr S3ObjectStorage::writeObject( +std::unique_ptr S3ObjectStorage::writeObject( /// NOLINT const std::string & path, std::optional attributes, FinalizeCallback && finalize_callback, diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h index 81595d4385d..fcc99ae1d91 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/S3ObjectStorage.h @@ -92,9 +92,16 @@ public: ObjectMetadata getObjectMetadata(const std::string & path) const override; - void copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) override; + void copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes = {}) override; - void copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes = {}) override; + void copyObjectToAnotherObjectStorage( /// NOLINT + const std::string & object_from, + const std::string & object_to, + IObjectStorage & object_storage_to, + std::optional object_to_attributes = {}) override; void shutdown() override; From 32e0ca22be26623cf1c698c1c0b9152b0a2454be Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 14:49:01 +0200 Subject: [PATCH 145/615] Format queries better --- ...emote_filesystem_cache_on_insert.reference | 48 +++++++++++++++-- ...2241_remote_filesystem_cache_on_insert.sql | 51 +++++++++++++++++-- 2 files changed, 93 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index b2269c16264..783227d5587 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -4,13 +4,41 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 SELECT count() FROM system.filesystem_cache; 0 INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 @@ -33,7 +61,21 @@ SELECT count() size FROM system.filesystem_cache; 7 SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index c3ab1de3693..31d4ca99abb 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -9,13 +9,43 @@ CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SET SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; + SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; + SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; @@ -33,7 +63,22 @@ SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state +FROM +( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path +) +WHERE endsWith(local_path, 'data.bin') +FORMAT Vertical; + SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; From 0e0b535b257e328a983792e0735fae4908f7b82d Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 16:54:39 +0200 Subject: [PATCH 146/615] Fix failing test --- src/Disks/DiskObjectStorage.cpp | 13 ------------- src/Disks/DiskObjectStorage.h | 5 +++++ src/Disks/S3/registerDiskS3.cpp | 10 +++++++++- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index d2384a82d13..2f0dad9e8b8 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -349,7 +349,6 @@ size_t DiskObjectStorage::getFileSize(const String & path) const void DiskObjectStorage::moveFile(const String & from_path, const String & to_path, bool should_send_metadata) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "MOVE FILE"); if (exists(to_path)) throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); @@ -375,7 +374,6 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat void DiskObjectStorage::replaceFile(const String & from_path, const String & to_path) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "REPLACE FILE"); if (exists(to_path)) { const String tmp_path = to_path + ".old"; @@ -389,7 +387,6 @@ void DiskObjectStorage::replaceFile(const String & from_path, const String & to_ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metadata_only) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "Remove shared file"); std::vector paths_to_remove; removeMetadata(path, paths_to_remove); @@ -399,7 +396,6 @@ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metada void DiskObjectStorage::removeFromRemoteFS(const std::vector & paths) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "Read from remote FS"); object_storage->removeObjects(paths); } @@ -446,7 +442,6 @@ bool DiskObjectStorage::checkUniqueId(const String & id) const void DiskObjectStorage::createHardLink(const String & src_path, const String & dst_path, bool should_send_metadata) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "HARDLINK FILE"); readUpdateAndStoreMetadata(src_path, false, [](Metadata & metadata) { metadata.ref_count++; return true; }); if (should_send_metadata && !dst_path.starts_with("shadow/")) @@ -472,7 +467,6 @@ void DiskObjectStorage::createHardLink(const String & src_path, const String & d void DiskObjectStorage::setReadOnly(const String & path) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "set readonly"); /// We should store read only flag inside metadata file (instead of using FS flag), /// because we modify metadata file when create hard-links from it. readUpdateAndStoreMetadata(path, false, [](Metadata & metadata) { metadata.read_only = true; return true; }); @@ -729,19 +723,12 @@ void DiskObjectStorage::restoreMetadataIfNeeded(const Poco::Util::AbstractConfig { if (send_metadata) { - LOG_DEBUG(log, "START RESTORING METADATA"); metadata_helper->restore(config, config_prefix, context); if (metadata_helper->readSchemaVersion(object_storage.get(), remote_fs_root_path) < DiskObjectStorageMetadataHelper::RESTORABLE_SCHEMA_VERSION) - { - LOG_DEBUG(log, "DONE READING"); metadata_helper->migrateToRestorableSchema(); - LOG_DEBUG(log, "MIGRATION FINISHED"); - } - LOG_DEBUG(log, "SEARCHING LAST REVISION"); metadata_helper->findLastRevision(); - LOG_DEBUG(log, "DONE RESTORING METADATA"); } } diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index 83a3ba6b508..d04bb3d0d04 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -56,6 +56,11 @@ public: void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; + std::string getCacheBasePath() const override + { + return object_storage->getCacheBasePath(); + } + /// Methods for working with metadata. For some operations (like hardlink /// creation) metadata can be updated concurrently from multiple threads /// (file actually rewritten on disk). So additional RW lock is required for diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index b344375f05b..de1194d6daf 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -37,7 +37,15 @@ namespace void checkWriteAccess(IDisk & disk) { auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); - file->write("test", 4); + try + { + file->write("test", 4); + } + catch (...) + { + file->finalize(); + throw; + } } void checkReadAccess(const String & disk_name, IDisk & disk) From 61014dbde649048bbc3fe3b0c1cbd7f43cc58b7d Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 14 May 2022 18:41:54 +0200 Subject: [PATCH 147/615] Simlify strange test --- src/Disks/DiskObjectStorage.cpp | 3 +-- src/Disks/DiskObjectStorage.h | 2 +- src/Disks/IObjectStorage.cpp | 2 +- src/Disks/S3ObjectStorage.cpp | 5 ++--- tests/integration/test_merge_tree_s3_restore/test.py | 7 ++++--- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 2f0dad9e8b8..b2d2bf23652 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -14,7 +14,6 @@ #include #include #include -#include namespace DB @@ -1114,7 +1113,7 @@ void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_objec } -void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) +void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const { for (const auto & key : keys) { diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index d04bb3d0d04..a67cc78b82b 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -315,7 +315,7 @@ public: void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); void readRestoreInformation(RestoreInformation & restore_information); void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); - void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys); + void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const; void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); std::atomic revision_counter = 0; diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/IObjectStorage.cpp index 44b9430172b..538cc702791 100644 --- a/src/Disks/IObjectStorage.cpp +++ b/src/Disks/IObjectStorage.cpp @@ -35,7 +35,7 @@ void IObjectStorage::removeFromCache(const std::string & path) } } -void IObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) +void IObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) // NOLINT { if (&object_storage_to == this) copyObject(object_from, object_to, object_to_attributes); diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index 94a9a42807a..3f26937a29b 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -287,7 +286,7 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons return result; } -void S3ObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) +void S3ObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object_from, const std::string & object_to, IObjectStorage & object_storage_to, std::optional object_to_attributes) // NOLINT { /// Shortcut for S3 if (auto * dest_s3 = dynamic_cast(&object_storage_to); dest_s3 != nullptr) @@ -399,7 +398,7 @@ void S3ObjectStorage::copyObjectMultipartImpl(const String & src_bucket, const S } } -void S3ObjectStorage::copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes) +void S3ObjectStorage::copyObject(const std::string & object_from, const std::string & object_to, std::optional object_to_attributes) // NOLINT { auto head = requestObjectHeadData(bucket, object_from).GetResult(); if (head.GetContentLength() >= static_cast(5UL * 1024 * 1024 * 1024)) diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py index 6ae63db52ef..d9a58e5e5b1 100644 --- a/tests/integration/test_merge_tree_s3_restore/test.py +++ b/tests/integration/test_merge_tree_s3_restore/test.py @@ -88,6 +88,8 @@ def generate_values(date_str, count, sign=1): def create_table( node, table_name, attach=False, replicated=False, db_atomic=False, uuid="" ): + node.query("DROP DATABASE IF EXISTS s3") + node.query( "CREATE DATABASE IF NOT EXISTS s3 ENGINE = {engine}".format( engine="Atomic" if db_atomic else "Ordinary" @@ -501,8 +503,8 @@ def test_restore_mutations(cluster, db_atomic): ) == "({})".format(4096) -@pytest.mark.parametrize("db_atomic", [False, True]) -def test_migrate_to_restorable_schema(cluster, db_atomic): +def test_migrate_to_restorable_schema(cluster): + db_atomic = True node = cluster.instances["node_not_restorable"] create_table(node, "test", db_atomic=db_atomic) @@ -556,7 +558,6 @@ def test_migrate_to_restorable_schema(cluster, db_atomic): "SELECT sum(id) FROM s3.test FORMAT Values" ) == "({})".format(0) - @pytest.mark.parametrize("replicated", [False, True]) @pytest.mark.parametrize("db_atomic", [False, True]) def test_restore_to_detached(cluster, replicated, db_atomic): From f459dd5aba1a9df152d5ee33d6be4116f5a4d28c Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 16:46:49 +0000 Subject: [PATCH 148/615] change windowview engine behaviour --- src/Parsers/ASTCreateQuery.cpp | 6 + src/Parsers/ASTCreateQuery.h | 1 + src/Parsers/ParserCreateQuery.cpp | 16 +- src/Parsers/ParserCreateQuery.h | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 209 +++++++++++------- src/Storages/WindowView/StorageWindowView.h | 5 +- .../01047_window_view_parser_inner_table.sql | 32 +-- .../0_stateless/01048_window_view_parser.sql | 36 +-- .../01050_window_view_parser_tumble.sql | 14 +- .../01051_window_view_parser_hop.sql | 14 +- .../01056_window_view_proc_hop_watch.py | 2 +- ..._window_view_event_hop_watch_strict_asc.py | 2 +- .../01062_window_view_event_hop_watch_asc.py | 2 +- .../01069_window_view_proc_tumble_watch.py | 2 +- .../01070_window_view_watch_events.py | 2 +- 15 files changed, 212 insertions(+), 133 deletions(-) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 23881cd3fbb..c1d1e3dcc38 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -394,6 +394,12 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; //-V519 + if (inner_storage) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " INNER" << (settings.hilite ? hilite_none : ""); + inner_storage->formatImpl(settings, state, frame); + } + if (storage) storage->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 28a4b2a2932..596baa3eb3c 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -76,6 +76,7 @@ public: StorageID to_table_id = StorageID::createEmpty(); /// For CREATE MATERIALIZED VIEW mv TO table. UUID to_inner_uuid = UUIDHelpers::Nil; /// For materialized view with inner table + ASTStorage * inner_storage = nullptr; /// For window view with inner table ASTStorage * storage = nullptr; ASTPtr watermark_function; ASTPtr lateness_function; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 0c36aeb3141..657de8317fc 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -835,6 +835,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserToken s_lparen(TokenType::OpeningRoundBracket); ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserStorage storage_p; + ParserStorage storage_inner; ParserTablePropertiesDeclarationList table_properties_p; ParserIntervalOperatorExpression watermark_p; ParserIntervalOperatorExpression lateness_p; @@ -844,6 +845,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ASTPtr to_table; ASTPtr columns_list; ASTPtr storage; + ASTPtr inner_storage; ASTPtr watermark; ASTPtr lateness; ASTPtr as_database; @@ -901,8 +903,17 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } - /// Inner table ENGINE for WINDOW VIEW - storage_p.parse(pos, storage, expected); + if (ParserKeyword{"INNER"}.ignore(pos, expected)) + { + /// Inner table ENGINE for WINDOW VIEW + storage_inner.parse(pos, inner_storage, expected); + } + + if (!to_table) + { + /// Target table ENGINE for WINDOW VIEW + storage_p.parse(pos, storage, expected); + } // WATERMARK if (ParserKeyword{"WATERMARK"}.ignore(pos, expected)) @@ -955,6 +966,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->set(query->columns_list, columns_list); query->set(query->storage, storage); + query->set(query->inner_storage, inner_storage); query->is_watermark_strictly_ascending = is_watermark_strictly_ascending; query->is_watermark_ascending = is_watermark_ascending; query->is_watermark_bounded = is_watermark_bounded; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index cdec7a70a5a..1148663fe01 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -391,7 +391,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/// CREATE|ATTACH WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] [ENGINE [db.]name] [WATERMARK function] AS SELECT ... +/// CREATE|ATTACH WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] [INNER ENGINE [db.]name] [ENGINE [db.]name] [WATERMARK function] [ALLOWED_LATENESS = interval_function] AS SELECT ... class ParserCreateWindowViewQuery : public IParserBase { protected: diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 13c6fca5163..e6f6f7652a5 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -349,6 +349,37 @@ namespace String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; + + static inline String generateInnerTableName(const StorageID & storage_id) + { + if (storage_id.hasUUID()) + return ".inner." + toString(storage_id.uuid); + return ".inner." + storage_id.getTableName(); + } + + static inline String generateTargetTableName(const StorageID & storage_id) + { + if (storage_id.hasUUID()) + return ".inner.target." + toString(storage_id.uuid); + return ".inner.target." + storage_id.table_name; + } + + static ASTPtr generateInnerFetchQuery(StorageID inner_table_id) + { + auto fetch_query = std::make_shared(); + auto select = std::make_shared(); + select->children.push_back(std::make_shared()); + fetch_query->setExpression(ASTSelectQuery::Expression::SELECT, select); + fetch_query->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared()); + auto tables_elem = std::make_shared(); + auto table_expr = std::make_shared(); + fetch_query->tables()->children.push_back(tables_elem); + tables_elem->table_expression = table_expr; + tables_elem->children.push_back(table_expr); + table_expr->database_and_table_name = std::make_shared(inner_table_id); + table_expr->children.push_back(table_expr->database_and_table_name); + return fetch_query; + } } static void extractDependentTable(ContextPtr context, ASTPtr & query, String & select_database_name, String & select_table_name) @@ -461,15 +492,52 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) { UInt32 w_start = addTime(watermark, window_kind, -window_num_units, *time_zone); + auto inner_storage = getInnerStorage(); InterpreterSelectQuery fetch( - getFetchColumnQuery(w_start, watermark), + inner_fetch_query, getContext(), - getInnerStorage(), - nullptr, + inner_storage, + inner_storage->getInMemoryMetadataPtr(), SelectQueryOptions(QueryProcessingStage::FetchColumns)); auto builder = fetch.buildQueryPipeline(); + ASTPtr filter_function; + if (is_tumble) + { + /// SELECT * FROM inner_table WHERE window_id_name == w_end + /// (because we fire at the end of windows) + filter_function = makeASTFunction("equals", std::make_shared(window_id_name), std::make_shared(watermark)); + } + else + { + auto func_array = makeASTFunction("array"); + auto w_end = watermark; + while (w_start < w_end) + { + /// slice_num_units = std::gcd(hop_num_units, window_num_units); + /// We use std::gcd(hop_num_units, window_num_units) as the new window size + /// to split the overlapped windows into non-overlapped. + /// For a hopping window with window_size=3 slice=1, the windows might be + /// [1,3],[2,4],[3,5], which will cause recomputation. + /// In this case, the slice_num_units will be `gcd(1,3)=1' and the non-overlapped + /// windows will split into [1], [2], [3]... We compute each split window into + /// mergeable state and merge them when the window is triggering. + func_array ->arguments->children.push_back(std::make_shared(w_end)); + w_end = addTime(w_end, window_kind, -slice_num_units, *time_zone); + } + filter_function = makeASTFunction("has", func_array, std::make_shared(window_id_name)); + } + + auto syntax_result = TreeRewriter(getContext()).analyze(filter_function, builder.getHeader().getNamesAndTypesList()); + auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, getContext()).getActionsDAG(false); + + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared( + header, std::make_shared(filter_expression), filter_function->getColumnName(), true); + }); + /// Adding window column DataTypes window_column_type{std::make_shared(), std::make_shared()}; ColumnWithTypeAndName column; @@ -565,10 +633,15 @@ inline void StorageWindowView::fire(UInt32 watermark) BlocksPtr blocks; Block header; + try { std::lock_guard lock(mutex); std::tie(blocks, header) = getNewBlocks(watermark); } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } for (const auto & block : *blocks) { @@ -689,33 +762,30 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( }; auto new_storage = std::make_shared(); - /// storage != nullptr in case create window view with ENGINE syntax + /// storage != nullptr in case create window view with INNER ENGINE syntax if (storage) { - new_storage->set(new_storage->engine, storage->engine->clone()); - if (storage->ttl_table) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "TTL is not supported for inner table in Window View"); - if (!endsWith(storage->engine->name, "MergeTree")) - throw Exception( - ErrorCodes::INCORRECT_QUERY, - "The ENGINE of WindowView must be MergeTree family of table engines " - "including the engines with replication support"); + new_storage->set(new_storage->engine, storage->engine->clone()); - if (storage->partition_by) - new_storage->set(new_storage->partition_by, visit(storage->partition_by)); - if (storage->primary_key) - new_storage->set(new_storage->primary_key, visit(storage->primary_key)); - if (storage->order_by) - new_storage->set(new_storage->order_by, visit(storage->order_by)); - if (storage->sample_by) - new_storage->set(new_storage->sample_by, visit(storage->sample_by)); + if (endsWith(storage->engine->name, "MergeTree")) + { + if (storage->partition_by) + new_storage->set(new_storage->partition_by, visit(storage->partition_by)); + if (storage->primary_key) + new_storage->set(new_storage->primary_key, visit(storage->primary_key)); + if (storage->order_by) + new_storage->set(new_storage->order_by, visit(storage->order_by)); + if (storage->sample_by) + new_storage->set(new_storage->sample_by, visit(storage->sample_by)); - if (storage->settings) - new_storage->set(new_storage->settings, storage->settings->clone()); + if (storage->settings) + new_storage->set(new_storage->settings, storage->settings->clone()); + } } else { @@ -1052,6 +1122,13 @@ StorageWindowView::StorageWindowView( if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); + /// If the target table is not set, use inner target table + inner_target_table = query.to_table_id.empty(); + if (inner_target_table && !query.storage) + throw Exception( + "You must specify where to save results of a WindowView query: either ENGINE or an existing table in a TO clause", + ErrorCodes::INCORRECT_QUERY); + if (query.select->list_of_selects->children.size() != 1) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, @@ -1095,7 +1172,6 @@ StorageWindowView::StorageWindowView( is_watermark_strictly_ascending = query.is_watermark_strictly_ascending; is_watermark_ascending = query.is_watermark_ascending; is_watermark_bounded = query.is_watermark_bounded; - target_table_id = query.to_table_id; /// Extract information about watermark, lateness. eventTimeParser(query); @@ -1105,29 +1181,52 @@ StorageWindowView::StorageWindowView( else window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "hop"); - auto generate_inner_table_name = [](const StorageID & storage_id) - { - if (storage_id.hasUUID()) - return ".inner." + toString(storage_id.uuid); - return ".inner." + storage_id.table_name; - }; - if (attach_) { - inner_table_id = StorageID(table_id_.database_name, generate_inner_table_name(table_id_)); + inner_table_id = StorageID(table_id_.database_name, generateInnerTableName(table_id_)); + if (inner_target_table) + target_table_id = StorageID(table_id_.database_name, generateTargetTableName(table_id_)); + else + target_table_id = query.to_table_id; } else { + /// create inner table auto inner_create_query - = getInnerTableCreateQuery(inner_query, query.storage, table_id_.database_name, generate_inner_table_name(table_id_)); + = getInnerTableCreateQuery(inner_query, query.inner_storage, table_id_.database_name, generateInnerTableName(table_id_)); auto create_context = Context::createCopy(context_); InterpreterCreateQuery create_interpreter(inner_create_query, create_context); create_interpreter.setInternal(true); create_interpreter.execute(); inner_table_id = StorageID(inner_create_query->getDatabase(), inner_create_query->getTable()); + + if (inner_target_table) + { + /// create inner target table + auto create_context = Context::createCopy(context_); + auto target_create_query = std::make_shared(); + target_create_query->setDatabase(table_id_.database_name); + target_create_query->setTable(generateTargetTableName(table_id_)); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); + + target_create_query->set(target_create_query->columns_list, new_columns_list); + target_create_query->set(target_create_query->storage, query.storage->ptr()); + + InterpreterCreateQuery create_interpreter(target_create_query, create_context); + create_interpreter.setInternal(true); + create_interpreter.execute(); + + target_table_id = StorageID(target_create_query->getDatabase(), target_create_query->getTable()); + } + else + target_table_id = query.to_table_id; } + inner_fetch_query = generateInnerFetchQuery(inner_table_id); + clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds(); next_fire_signal = getWindowUpperBound(std::time(nullptr)); @@ -1463,6 +1562,9 @@ void StorageWindowView::dropInnerTableIfAny(bool no_delay, ContextPtr local_cont { InterpreterDropQuery::executeDropQuery( ASTDropQuery::Kind::Drop, getContext(), local_context, inner_table_id, no_delay); + + if (inner_target_table) + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, target_table_id, no_delay); } catch (...) { @@ -1497,51 +1599,6 @@ StoragePtr StorageWindowView::getInnerStorage() const return DatabaseCatalog::instance().getTable(inner_table_id, getContext()); } -ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const -{ - auto res_query = std::make_shared(); - auto select = std::make_shared(); - select->children.push_back(std::make_shared()); - res_query->setExpression(ASTSelectQuery::Expression::SELECT, select); - res_query->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared()); - auto tables_elem = std::make_shared(); - auto table_expr = std::make_shared(); - res_query->tables()->children.push_back(tables_elem); - tables_elem->table_expression = table_expr; - tables_elem->children.push_back(table_expr); - table_expr->database_and_table_name = std::make_shared(inner_table_id); - table_expr->children.push_back(table_expr->database_and_table_name); - - if (is_tumble) - { - /// SELECT * FROM inner_table PREWHERE window_id_name == w_end - /// (because we fire at the end of windows) - auto func_equals = makeASTFunction("equals", std::make_shared(window_id_name), std::make_shared(w_end)); - res_query->setExpression(ASTSelectQuery::Expression::PREWHERE, func_equals); - } - else - { - auto func_array = makeASTFunction("array"); - while (w_start < w_end) - { - /// slice_num_units = std::gcd(hop_num_units, window_num_units); - /// We use std::gcd(hop_num_units, window_num_units) as the new window size - /// to split the overlapped windows into non-overlapped. - /// For a hopping window with window_size=3 slice=1, the windows might be - /// [1,3],[2,4],[3,5], which will cause recomputation. - /// In this case, the slice_num_units will be `gcd(1,3)=1' and the non-overlapped - /// windows will split into [1], [2], [3]... We compute each split window into - /// mergeable state and merge them when the window is triggering. - func_array ->arguments->children.push_back(std::make_shared(w_end)); - w_end = addTime(w_end, window_kind, -slice_num_units, *time_zone); - } - auto func_has = makeASTFunction("has", func_array, std::make_shared(window_id_name)); - res_query->setExpression(ASTSelectQuery::Expression::PREWHERE, func_has); - } - - return res_query; -} - StoragePtr StorageWindowView::getTargetStorage() const { return DatabaseCatalog::instance().getTable(target_table_id, getContext()); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 101d29d1ae7..83b8df1a554 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -180,11 +180,15 @@ private: /// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *) ASTPtr final_query; + /// Used to fetch the data from inner storage. + ASTPtr inner_fetch_query; + bool is_proctime{true}; bool is_time_column_func_now; bool is_tumble; // false if is hop std::atomic shutdown_called{false}; bool has_inner_table{true}; + bool inner_target_table{false}; mutable Block sample_block; UInt64 clean_interval_ms; const DateLUTImpl * time_zone = nullptr; @@ -253,7 +257,6 @@ private: void updateMaxTimestamp(UInt32 timestamp); ASTPtr getFinalQuery() const { return final_query->clone(); } - ASTPtr getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const; StoragePtr getParentStorage() const; diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index 3e4f95d098e..8b978e6094c 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -14,46 +14,46 @@ SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a), tumbleEnd(wid) AS count FROM test_01047.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a), tumbleEnd(wid) AS count FROM test_01047.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY id AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---PARTITION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---JOIN---'; DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; @@ -61,46 +61,46 @@ SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY id AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---PARTITION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---JOIN---'; DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; DROP TABLE test_01047.wv; diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index e17352205e3..a54058ce41b 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -13,90 +13,90 @@ CREATE TABLE test_01048.mt_2(a Int32, b Int32, timestamp DateTime) ENGINE=MergeT SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumbleEnd(wid) as wend FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL 1 SECOND) as wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, tumbleEnd(wid) as wend FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL 1 SECOND) as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, b; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, b; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, plus(a, b); +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, plus(a, b); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---TimeZone---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND, 'Asia/Shanghai') AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND, 'Asia/Shanghai') AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, b as id FROM test_01048.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01048.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---JOIN---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND) as wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND) as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, b; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, b; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, plus(a, b); +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, plus(a, b); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---TimeZone---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'Asia/Shanghai') as wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'Asia/Shanghai') as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, b as id FROM test_01048.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01048.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---JOIN---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE test_01048.wv; diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index 54f9ed00cbe..d9604bb2b52 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -6,28 +6,28 @@ CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(tumble(timestamp, INTERVAL '3' SECOND)) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(tumble(timestamp, INTERVAL '3' SECOND)) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WITH---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end, date_time FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end, date_time FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WHERE---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; SELECT '---With now---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(tumble(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY tumble(now(), INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(tumble(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY tumble(now(), INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index 0f705d5c911..472dc66f1a2 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -6,28 +6,28 @@ CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WITH---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end, date_time FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end, date_time FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WHERE---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; SELECT '---With now---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index be139c153aa..5aece08fa74 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -39,7 +39,7 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01056_window_view_proc_hop_watch.wv AS SELECT count(a) AS count FROM 01056_window_view_proc_hop_watch.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" + "CREATE WINDOW VIEW 01056_window_view_proc_hop_watch.wv ENGINE Memory AS SELECT count(a) AS count FROM 01056_window_view_proc_hop_watch.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" ) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index f5024cb11ab..ff950b93a99 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -35,7 +35,7 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW db_01059_event_hop_watch_strict_asc.wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, hopEnd(wid) as w_end FROM db_01059_event_hop_watch_strict_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid;" + "CREATE WINDOW VIEW db_01059_event_hop_watch_strict_asc.wv ENGINE Memory WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, hopEnd(wid) as w_end FROM db_01059_event_hop_watch_strict_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid;" ) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index 7f23e983ba2..02210f15e25 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -39,7 +39,7 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01062_window_view_event_hop_watch_asc.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01062_window_view_event_hop_watch_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01062_window_view_event_hop_watch_asc.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01062_window_view_event_hop_watch_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index 4c675fcabfb..24a6ccd157e 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -39,7 +39,7 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01069_window_view_proc_tumble_watch.wv AS SELECT count(a) AS count FROM 01069_window_view_proc_tumble_watch.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" + "CREATE WINDOW VIEW 01069_window_view_proc_tumble_watch.wv ENGINE Memory AS SELECT count(a) AS count FROM 01069_window_view_proc_tumble_watch.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" ) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01070_window_view_watch_events.py b/tests/queries/0_stateless/01070_window_view_watch_events.py index 2bf732d68e5..c4d36bc51a2 100755 --- a/tests/queries/0_stateless/01070_window_view_watch_events.py +++ b/tests/queries/0_stateless/01070_window_view_watch_events.py @@ -39,7 +39,7 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01070_window_view_watch_events.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM 01070_window_view_watch_events.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01070_window_view_watch_events.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM 01070_window_view_watch_events.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" ) client1.expect(prompt) From 3146308ab54c76e87946a266340f52968a0447b8 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 16:47:47 +0000 Subject: [PATCH 149/615] add test --- ...w_view_inner_table_memory_tumble.reference | 7 ++++ ...9_window_view_inner_table_memory_tumble.sh | 34 +++++++++++++++++++ ...ndow_view_inner_table_memory_hop.reference | 6 ++++ ...1080_window_view_inner_table_memory_hop.sh | 34 +++++++++++++++++++ ..._window_view_target_table_engine.reference | 7 ++++ .../01081_window_view_target_table_engine.sh | 31 +++++++++++++++++ 6 files changed, 119 insertions(+) create mode 100644 tests/queries/0_stateless/01079_window_view_inner_table_memory_tumble.reference create mode 100755 tests/queries/0_stateless/01079_window_view_inner_table_memory_tumble.sh create mode 100644 tests/queries/0_stateless/01080_window_view_inner_table_memory_hop.reference create mode 100755 tests/queries/0_stateless/01080_window_view_inner_table_memory_hop.sh create mode 100644 tests/queries/0_stateless/01081_window_view_target_table_engine.reference create mode 100755 tests/queries/0_stateless/01081_window_view_target_table_engine.sh diff --git a/tests/queries/0_stateless/01079_window_view_inner_table_memory_tumble.reference b/tests/queries/0_stateless/01079_window_view_inner_table_memory_tumble.reference new file mode 100644 index 00000000000..de722f47f08 --- /dev/null +++ b/tests/queries/0_stateless/01079_window_view_inner_table_memory_tumble.reference @@ -0,0 +1,7 @@ +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +1 4 1990-01-01 12:00:10 +1 5 1990-01-01 12:00:10 +1 6 1990-01-01 12:00:15 +1 7 1990-01-01 12:00:15 diff --git a/tests/queries/0_stateless/01079_window_view_inner_table_memory_tumble.sh b/tests/queries/0_stateless/01079_window_view_inner_table_memory_tumble.sh new file mode 100755 index 00000000000..1d47889ea1a --- /dev/null +++ b/tests/queries/0_stateless/01079_window_view_inner_table_memory_tumble.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery < Date: Sat, 14 May 2022 18:53:30 +0200 Subject: [PATCH 150/615] Fix BLACK --- tests/integration/test_merge_tree_s3_restore/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py index d9a58e5e5b1..e6ca4a78c25 100644 --- a/tests/integration/test_merge_tree_s3_restore/test.py +++ b/tests/integration/test_merge_tree_s3_restore/test.py @@ -558,6 +558,7 @@ def test_migrate_to_restorable_schema(cluster): "SELECT sum(id) FROM s3.test FORMAT Values" ) == "({})".format(0) + @pytest.mark.parametrize("replicated", [False, True]) @pytest.mark.parametrize("db_atomic", [False, True]) def test_restore_to_detached(cluster, replicated, db_atomic): From d7f2be1b29b63887a7c8b8476d46eeb3e4fae034 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 14 May 2022 17:49:51 +0000 Subject: [PATCH 151/615] update test --- .../queries/0_stateless/01078_window_view_alter_query_watch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index bfd71146e1f..9e4625281d7 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -28,6 +28,8 @@ with client(name="client1>", log=log) as client1, client( client2.expect(prompt) client3.send("SET allow_experimental_window_view = 1") client3.expect(prompt) + client3.send("SET window_view_heartbeat_interval = 1") + client3.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01078_window_view_alter_query_watch") client1.expect(prompt) From 4ed79cd9e17f5ad7ec59e52533211e158961596f Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 06:45:25 +0000 Subject: [PATCH 152/615] fix windowview watch --- src/Storages/WindowView/StorageWindowView.cpp | 9 +++-- src/Storages/WindowView/WindowViewSource.h | 34 +++++++++---------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 8dbaaa295aa..011376db00d 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -638,7 +638,9 @@ inline void StorageWindowView::fire(UInt32 watermark) if (auto watch_stream_ptr = watch_stream.lock()) watch_stream_ptr->addBlock(block, watermark); } + fire_condition.notify_all(); } + if (!target_table_id.empty()) { StoragePtr target_table = getTargetStorage(); @@ -1495,11 +1497,8 @@ void StorageWindowView::shutdown() { shutdown_called = true; - { - std::lock_guard lock(mutex); - fire_condition.notify_all(); - fire_signal_condition.notify_all(); - } + fire_condition.notify_all(); + fire_signal_condition.notify_all(); clean_cache_task->deactivate(); fire_task->deactivate(); diff --git a/src/Storages/WindowView/WindowViewSource.h b/src/Storages/WindowView/WindowViewSource.h index 7b914933035..e8d19954c18 100644 --- a/src/Storages/WindowView/WindowViewSource.h +++ b/src/Storages/WindowView/WindowViewSource.h @@ -26,7 +26,7 @@ public: , window_view_timezone(window_view_timezone_) , has_limit(has_limit_) , limit(limit_) - , heartbeat_interval_sec(heartbeat_interval_sec_) + , heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000) { if (is_events) header.insert( @@ -83,29 +83,28 @@ protected: return {getHeader(), 0}; } - storage->fire_condition.wait_for(lock, std::chrono::seconds(heartbeat_interval_sec)); - - if (isCancelled() || storage->shutdown_called) + while((Poco::Timestamp().epochMicroseconds() - last_heartbeat_timestamp_usec) < heartbeat_interval_usec) { - return {Block(), 0}; - } - - if (blocks_with_watermark.empty()) - return {getHeader(), 0}; - else - { - end_of_blocks = false; - auto res = blocks_with_watermark.front(); - blocks_with_watermark.pop_front(); - return res; + bool signaled = std::cv_status::no_timeout == storage->fire_condition.wait_for(lock, std::chrono::microseconds(1000)); + if (signaled) + break; + if (isCancelled() || storage->shutdown_called) + return {Block(), 0}; } } - else + + if (!blocks_with_watermark.empty()) { + end_of_blocks = false; auto res = blocks_with_watermark.front(); blocks_with_watermark.pop_front(); return res; } + else + { + last_heartbeat_timestamp_usec = static_cast(Poco::Timestamp().epochMicroseconds()); + return {getHeader(), 0}; + } } private: @@ -121,6 +120,7 @@ private: Int64 num_updates = -1; bool end_of_blocks = false; std::mutex blocks_mutex; - UInt64 heartbeat_interval_sec; + UInt64 heartbeat_interval_usec; + UInt64 last_heartbeat_timestamp_usec = 0; }; } From 65779adf8bc134960123ab163cfa01ac2206a3e8 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 07:07:27 +0000 Subject: [PATCH 153/615] update code style --- src/Storages/WindowView/WindowViewSource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/WindowView/WindowViewSource.h b/src/Storages/WindowView/WindowViewSource.h index e8d19954c18..dc47e3fb878 100644 --- a/src/Storages/WindowView/WindowViewSource.h +++ b/src/Storages/WindowView/WindowViewSource.h @@ -83,7 +83,7 @@ protected: return {getHeader(), 0}; } - while((Poco::Timestamp().epochMicroseconds() - last_heartbeat_timestamp_usec) < heartbeat_interval_usec) + while ((Poco::Timestamp().epochMicroseconds() - last_heartbeat_timestamp_usec) < heartbeat_interval_usec) { bool signaled = std::cv_status::no_timeout == storage->fire_condition.wait_for(lock, std::chrono::microseconds(1000)); if (signaled) From 528e21ebf47ef27d03ed06841a83f24ba227b3b7 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 09:14:01 +0000 Subject: [PATCH 154/615] add test for windowview watch limit --- .../01082_window_view_watch_limit.py | 68 +++++++++++++++++++ .../01082_window_view_watch_limit.reference | 0 2 files changed, 68 insertions(+) create mode 100755 tests/queries/0_stateless/01082_window_view_watch_limit.py create mode 100644 tests/queries/0_stateless/01082_window_view_watch_limit.reference diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py new file mode 100755 index 00000000000..89286fb6ed7 --- /dev/null +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +# Tags: no-parallel + +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + +from client import client, prompt, end_of_block + +log = None +# uncomment the line below for debugging +# log=sys.stdout + +with client(name="client1>", log=log) as client1, client( + name="client2>", log=log +) as client2: + client1.expect(prompt) + client2.expect(prompt) + + client1.send("SET allow_experimental_window_view = 1") + client1.expect(prompt) + client1.send("SET window_view_heartbeat_interval = 1") + client1.expect(prompt) + client2.send("SET allow_experimental_window_view = 1") + client2.expect(prompt) + + client1.send("CREATE DATABASE 01082_window_view_watch_limit") + client1.expect(prompt) + client1.send("DROP TABLE IF EXISTS 01082_window_view_watch_limit.mt") + client1.expect(prompt) + client1.send("DROP TABLE IF EXISTS 01082_window_view_watch_limit.wv NO DELAY") + client1.expect(prompt) + + client1.send( + "CREATE TABLE 01082_window_view_watch_limit.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" + ) + client1.expect(prompt) + client1.send( + "CREATE WINDOW VIEW 01082_window_view_watch_limit.wv AS SELECT count(a) AS count FROM 01082_window_view_watch_limit.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" + ) + client1.expect("Ok.") + client1.expect(prompt) + + client1.send("WATCH 01082_window_view_watch_limit.wv LIMIT 2") + client1.expect("Query id" + end_of_block) + client2.send( + "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, now('US/Samoa') + 3)" + ) + client2.expect("Ok.") + client1.expect("1" + end_of_block) + client1.expect("Progress: 1.00 rows.*\)") + client2.send( + "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, now('US/Samoa') + 3)" + ) + client2.expect("Ok.") + client1.expect("1" + end_of_block) + client1.expect("Progress: 1.00 rows.*\)") + client1.expect("2 row" + end_of_block) + client1.expect(prompt) + + client1.send("DROP TABLE 01082_window_view_watch_limit.wv NO DELAY") + client1.expect(prompt) + client1.send("DROP TABLE 01082_window_view_watch_limit.mt") + client1.expect(prompt) + client1.send("DROP DATABASE IF EXISTS 01082_window_view_watch_limit") + client1.expect(prompt) diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.reference b/tests/queries/0_stateless/01082_window_view_watch_limit.reference new file mode 100644 index 00000000000..e69de29bb2d From 6c2d78b8022abd5da8f8df7df17104bcfaabf439 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 11:18:23 +0000 Subject: [PATCH 155/615] rename var --- src/Storages/WindowView/StorageWindowView.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index cab07b12858..5528ecbff16 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -421,8 +421,8 @@ UInt32 StorageWindowView::getCleanupBound() ASTPtr StorageWindowView::getCleanupQuery() { - ASTPtr function_equal; - function_equal = makeASTFunction( + ASTPtr function_less; + function_less= makeASTFunction( "less", std::make_shared(window_id_name), std::make_shared(getCleanupBound())); @@ -436,7 +436,7 @@ ASTPtr StorageWindowView::getCleanupQuery() auto alter_command = std::make_shared(); alter_command->type = ASTAlterCommand::DELETE; - alter_command->predicate = function_equal; + alter_command->predicate = function_less; alter_command->children.push_back(alter_command->predicate); alter_query->command_list->children.push_back(alter_command); return alter_query; From 033b1e7f8267537403ad7f040587ed46342b1e90 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 11:18:42 +0000 Subject: [PATCH 156/615] add lock for cleanup query --- src/Storages/WindowView/StorageWindowView.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 5528ecbff16..dfc15452c08 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -911,7 +911,10 @@ inline void StorageWindowView::cleanup() auto cleanup_context = Context::createCopy(getContext()); cleanup_context->getClientInfo().query_kind = ClientInfo::QueryKind::INITIAL_QUERY; InterpreterAlterQuery alter_query(getCleanupQuery(), cleanup_context); - alter_query.execute(); + { + std::lock_guard lock(mutex); + alter_query.execute(); + } std::lock_guard lock(fire_signal_mutex); watch_streams.remove_if([](std::weak_ptr & ptr) { return ptr.expired(); }); From 3f64814544b23d89dc21b51948ccb381315db9db Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 11:37:40 +0000 Subject: [PATCH 157/615] update test --- .../01078_window_view_alter_query_watch.py | 22 +++++-------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 9e4625281d7..96abfda12c9 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -39,28 +39,22 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send( - "CREATE TABLE 01078_window_view_alter_query_watch.mt(a Int32, timestamp DateTime('US/Samoa')) ENGINE=MergeTree ORDER BY tuple()" + "CREATE TABLE 01078_window_view_alter_query_watch.mt(a Int32) ENGINE=MergeTree ORDER BY tuple()" ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01078_window_view_alter_query_watch.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01078_window_view_alter_query_watch.wv AS SELECT count(a) AS count FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(now('US/Samoa'), INTERVAL '1' SECOND, 'US/Samoa') AS wid;" ) client1.expect(prompt) client1.send("WATCH 01078_window_view_alter_query_watch.wv") client1.expect("Query id" + end_of_block) - client2.send( - "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:00');" - ) - client2.expect("Ok.") - client2.send( - "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:06');" - ) + client2.send("INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1)") client2.expect("Ok.") client1.expect("1" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") client2.send( - "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) * 2 AS count, tumbleEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" + "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) AS count FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(now('US/Samoa'), INTERVAL '1' SECOND, 'US/Samoa') AS wid;" ) client2.expect("Ok.") client2.expect(prompt) @@ -68,13 +62,7 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client3.send("WATCH 01078_window_view_alter_query_watch.wv") client3.expect("Query id" + end_of_block) - client2.send( - "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:11');" - ) - client2.expect("Ok.") - client2.send( - "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:18');" - ) + client2.send("INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1)") client2.expect("Ok.") client3.expect("2" + end_of_block) client3.expect("Progress: 1.00 rows.*\)") From ea62dc4d0a8f5e1e2c96c549cba630304dce288b Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 16:05:19 +0000 Subject: [PATCH 158/615] use event time in test --- .../0_stateless/01082_window_view_watch_limit.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py index 89286fb6ed7..cfd7cd415ad 100755 --- a/tests/queries/0_stateless/01082_window_view_watch_limit.py +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -38,26 +38,24 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01082_window_view_watch_limit.wv AS SELECT count(a) AS count FROM 01082_window_view_watch_limit.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" + "CREATE WINDOW VIEW 01082_window_view_watch_limit.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01082_window_view_watch_limit.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect("Ok.") client1.expect(prompt) - client1.send("WATCH 01082_window_view_watch_limit.wv LIMIT 2") + client1.send("WATCH 01082_window_view_watch_limit.wv LIMIT 1") client1.expect("Query id" + end_of_block) client2.send( - "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, now('US/Samoa') + 3)" + "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, '1990/01/01 12:00:00');" ) client2.expect("Ok.") - client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") client2.send( - "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, now('US/Samoa') + 3)" + "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, '1990/01/01 12:00:05');" ) client2.expect("Ok.") client1.expect("1" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") - client1.expect("2 row" + end_of_block) + client1.expect("1 row" + end_of_block) client1.expect(prompt) client1.send("DROP TABLE 01082_window_view_watch_limit.wv NO DELAY") From 93fec4ebc729de14ac4b00253869a5ad0294bf46 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 16:12:24 +0000 Subject: [PATCH 159/615] remove dependency when drop windowview --- src/Storages/WindowView/StorageWindowView.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 13c6fca5163..ad216b79ba6 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1449,6 +1449,8 @@ void StorageWindowView::checkTableCanBeDropped() const void StorageWindowView::drop() { + DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID()); + /// Must be guaranteed at this point for database engine Atomic that has_inner_table == false, /// because otherwise will be a deadlock. dropInnerTableIfAny(true, getContext()); From 77ad48fee9f03de90234c8e8fbcee02fbf5f6fd3 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 17:40:42 +0000 Subject: [PATCH 160/615] fix build --- src/Storages/WindowView/StorageWindowView.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e6f6f7652a5..bb5e78fab9a 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -350,21 +350,21 @@ namespace String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; - static inline String generateInnerTableName(const StorageID & storage_id) + String generateInnerTableName(const StorageID & storage_id) { if (storage_id.hasUUID()) return ".inner." + toString(storage_id.uuid); return ".inner." + storage_id.getTableName(); } - static inline String generateTargetTableName(const StorageID & storage_id) + String generateTargetTableName(const StorageID & storage_id) { if (storage_id.hasUUID()) return ".inner.target." + toString(storage_id.uuid); return ".inner.target." + storage_id.table_name; } - static ASTPtr generateInnerFetchQuery(StorageID inner_table_id) + ASTPtr generateInnerFetchQuery(StorageID inner_table_id) { auto fetch_query = std::make_shared(); auto select = std::make_shared(); From ba472c97b2fc7fe460298b8a2283fdc62fb79d6a Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 17:48:01 +0000 Subject: [PATCH 161/615] update tests --- tests/queries/0_stateless/01056_window_view_proc_hop_watch.py | 4 +--- .../01059_window_view_event_hop_watch_strict_asc.py | 2 +- .../0_stateless/01062_window_view_event_hop_watch_asc.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index 5aece08fa74..772ac04b287 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -25,14 +25,12 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("CREATE DATABASE 01056_window_view_proc_hop_watch") + client1.send("CREATE DATABASE IF NOT EXISTS 01056_window_view_proc_hop_watch") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS 01056_window_view_proc_hop_watch.mt") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS 01056_window_view_proc_hop_watch.wv") client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS `.inner.wv`") - client1.expect(prompt) client1.send( "CREATE TABLE 01056_window_view_proc_hop_watch.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index ff950b93a99..75314d1ca84 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -23,7 +23,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("SET window_view_heartbeat_interval = 1") client1.expect(prompt) - client1.send("CREATE DATABASE db_01059_event_hop_watch_strict_asc") + client1.send("CREATE DATABASE IF NOT EXISTS db_01059_event_hop_watch_strict_asc") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS db_01059_event_hop_watch_strict_asc.mt") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index 02210f15e25..57ef6f98165 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -25,7 +25,7 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("CREATE DATABASE 01062_window_view_event_hop_watch_asc") + client1.send("CREATE DATABASE IF NOT EXISTS 01062_window_view_event_hop_watch_asc") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS 01062_window_view_event_hop_watch_asc.mt") client1.expect(prompt) From af3e13541abb37972982c55d25280aa40895ff4b Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 15 May 2022 17:56:53 +0000 Subject: [PATCH 162/615] update test --- tests/queries/0_stateless/01082_window_view_watch_limit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py index cfd7cd415ad..99808bc2cf7 100755 --- a/tests/queries/0_stateless/01082_window_view_watch_limit.py +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -26,7 +26,7 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("CREATE DATABASE 01082_window_view_watch_limit") + client1.send("CREATE DATABASE IF NOT EXISTS 01082_window_view_watch_limit") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS 01082_window_view_watch_limit.mt") client1.expect(prompt) From 4f1d4dde2236dcd1f301ac64c477af6cbba693fe Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 May 2022 00:16:00 +0200 Subject: [PATCH 163/615] Disable s3 logging by default --- src/Core/Settings.h | 1 + src/Disks/S3/diskSettings.cpp | 3 +- src/IO/S3/PocoHTTPClient.cpp | 52 +++++++++++++++++++++-------------- src/IO/S3/PocoHTTPClient.h | 6 ++-- src/IO/S3Common.cpp | 23 +++++++++++----- src/IO/S3Common.h | 4 ++- src/Storages/StorageS3.cpp | 3 +- 7 files changed, 60 insertions(+), 32 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2e6d657698c..505a519aeca 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -86,6 +86,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ + M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Make sense for debug only.", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \ diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/S3/diskSettings.cpp index 579f160abd4..4ff322b5b54 100644 --- a/src/Disks/S3/diskSettings.cpp +++ b/src/Disks/S3/diskSettings.cpp @@ -95,7 +95,8 @@ std::unique_ptr getClient(const Poco::Util::AbstractConfigura { S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), - context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects); + context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects, + context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging); S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 9aacbda3fbf..aff1b9db6fd 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -49,10 +49,12 @@ namespace DB::S3 PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( const String & force_region_, const RemoteHostFilter & remote_host_filter_, - unsigned int s3_max_redirects_) + unsigned int s3_max_redirects_, + bool enable_s3_requests_logging_) : force_region(force_region_) , remote_host_filter(remote_host_filter_) , s3_max_redirects(s3_max_redirects_) + , enable_s3_requests_logging(enable_s3_requests_logging_) { } @@ -87,16 +89,17 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion() } -PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration) - : per_request_configuration(clientConfiguration.perRequestConfiguration) - , error_report(clientConfiguration.error_report) +PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) + : per_request_configuration(client_configuration.perRequestConfiguration) + , error_report(client_configuration.error_report) , timeouts(ConnectionTimeouts( - Poco::Timespan(clientConfiguration.connectTimeoutMs * 1000), /// connection timeout. - Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000), /// send timeout. - Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000) /// receive timeout. + Poco::Timespan(client_configuration.connectTimeoutMs * 1000), /// connection timeout. + Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// send timeout. + Poco::Timespan(client_configuration.requestTimeoutMs * 1000) /// receive timeout. )) - , remote_host_filter(clientConfiguration.remote_host_filter) - , s3_max_redirects(clientConfiguration.s3_max_redirects) + , remote_host_filter(client_configuration.remote_host_filter) + , s3_max_redirects(client_configuration.s3_max_redirects) + , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging) { } @@ -119,7 +122,8 @@ void PocoHTTPClient::makeRequestInternal( Poco::Logger * log = &Poco::Logger::get("AWSClient"); auto uri = request.GetUri().GetURIString(); - LOG_TEST(log, "Make request to: {}", uri); + if (enable_s3_requests_logging) + LOG_TEST(log, "Make request to: {}", uri); enum class S3MetricType { @@ -251,7 +255,8 @@ void PocoHTTPClient::makeRequestInternal( if (request.GetContentBody()) { - LOG_TEST(log, "Writing request body."); + if (enable_s3_requests_logging) + LOG_TEST(log, "Writing request body."); if (attempt > 0) /// rewind content body buffer. { @@ -259,24 +264,28 @@ void PocoHTTPClient::makeRequestInternal( request.GetContentBody()->seekg(0); } auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream); - LOG_TEST(log, "Written {} bytes to request body", size); + if (enable_s3_requests_logging) + LOG_TEST(log, "Written {} bytes to request body", size); } - LOG_TEST(log, "Receiving response..."); + if (enable_s3_requests_logging) + LOG_TEST(log, "Receiving response..."); auto & response_body_stream = session->receiveResponse(poco_response); watch.stop(); ProfileEvents::increment(select_metric(S3MetricType::Microseconds), watch.elapsedMicroseconds()); int status_code = static_cast(poco_response.getStatus()); - LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason()); + if (enable_s3_requests_logging) + LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason()); if (poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT) { auto location = poco_response.get("location"); remote_host_filter.checkURL(Poco::URI(location)); uri = location; - LOG_TEST(log, "Redirecting request to new location: {}", location); + if (enable_s3_requests_logging) + LOG_TEST(log, "Redirecting request to new location: {}", location); ProfileEvents::increment(select_metric(S3MetricType::Redirects)); @@ -286,13 +295,16 @@ void PocoHTTPClient::makeRequestInternal( response->SetResponseCode(static_cast(status_code)); response->SetContentType(poco_response.getContentType()); - WriteBufferFromOwnString headers_ss; - for (const auto & [header_name, header_value] : poco_response) + if (enable_s3_requests_logging) { - response->AddHeader(header_name, header_value); - headers_ss << header_name << ": " << header_value << "; "; + WriteBufferFromOwnString headers_ss; + for (const auto & [header_name, header_value] : poco_response) + { + response->AddHeader(header_name, header_value); + headers_ss << header_name << ": " << header_value << "; "; + } + LOG_TEST(log, "Received headers: {}", headers_ss.str()); } - LOG_TEST(log, "Received headers: {}", headers_ss.str()); if (status_code == 429 || status_code == 503) { // API throttling diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index defd029f05a..3897097b196 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -32,13 +32,14 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration String force_region; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; + bool enable_s3_requests_logging; void updateSchemeAndRegion(); std::function error_report; private: - PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_); + PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, bool enable_s3_requests_logging_); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. friend ClientFactory; @@ -79,7 +80,7 @@ private: class PocoHTTPClient : public Aws::Http::HttpClient { public: - explicit PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration); + explicit PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration); ~PocoHTTPClient() override = default; std::shared_ptr MakeRequest( @@ -99,6 +100,7 @@ private: ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; + bool enable_s3_requests_logging; }; } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 3732b662ea2..95e67d5d6d6 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -61,7 +61,8 @@ const std::pair & convertLogLevel(Aws::U class AWSLogger final : public Aws::Utils::Logging::LogSystemInterface { public: - AWSLogger() + explicit AWSLogger(bool enable_s3_requests_logging_) + :enable_s3_requests_logging(enable_s3_requests_logging_) { for (auto [tag, name] : S3_LOGGER_TAG_NAMES) tag_loggers[tag] = &Poco::Logger::get(name); @@ -71,7 +72,13 @@ public: ~AWSLogger() final = default; - Aws::Utils::Logging::LogLevel GetLogLevel() const final { return Aws::Utils::Logging::LogLevel::Trace; } + Aws::Utils::Logging::LogLevel GetLogLevel() const final + { + if (enable_s3_requests_logging) + return Aws::Utils::Logging::LogLevel::Trace; + else + return Aws::Utils::Logging::LogLevel::Info; + } void Log(Aws::Utils::Logging::LogLevel log_level, const char * tag, const char * format_str, ...) final // NOLINT { @@ -100,6 +107,7 @@ public: private: Poco::Logger * default_logger; + bool enable_s3_requests_logging; std::unordered_map tag_loggers; }; @@ -535,7 +543,7 @@ public: /// AWS API tries credentials providers one by one. Some of providers (like ProfileConfigFileAWSCredentialsProvider) can be /// quite verbose even if nobody configured them. So we use our provider first and only after it use default providers. { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging); AddProvider(std::make_shared(aws_client_configuration)); } @@ -572,7 +580,7 @@ public: } else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true") { - DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects); + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging); /// See MakeDefaultHttpResourceClientConfiguration(). /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside @@ -692,7 +700,7 @@ namespace S3 { aws_options = Aws::SDKOptions{}; Aws::InitAPI(aws_options); - Aws::Utils::Logging::InitializeAWSLogging(std::make_shared()); + Aws::Utils::Logging::InitializeAWSLogging(std::make_shared(false)); Aws::Http::SetHttpClientFactory(std::make_shared()); } @@ -756,9 +764,10 @@ namespace S3 PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT const String & force_region, const RemoteHostFilter & remote_host_filter, - unsigned int s3_max_redirects) + unsigned int s3_max_redirects, + bool enable_s3_requestrs_logging) { - return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects); + return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requestrs_logging); } URI::URI(const Poco::URI & uri_) diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 01f77cff820..98471f5b81f 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -44,12 +44,14 @@ public: PocoHTTPClientConfiguration createClientConfiguration( const String & force_region, const RemoteHostFilter & remote_host_filter, - unsigned int s3_max_redirects); + unsigned int s3_max_redirects, + bool enable_s3_requestrs_logging); private: ClientFactory(); Aws::SDKOptions aws_options; + std::atomic s3_requests_logging_enabled; }; /** diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2ab553ad450..c981a055ab6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -878,7 +878,8 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( settings.auth_settings.region, - ctx->getRemoteHostFilter(), ctx->getGlobalContext()->getSettingsRef().s3_max_redirects); + ctx->getRemoteHostFilter(), ctx->getGlobalContext()->getSettingsRef().s3_max_redirects, + ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging); client_configuration.endpointOverride = upd.uri.endpoint; client_configuration.maxConnections = upd.rw_settings.max_connections; From 95e6e5aef789748de8e7263e3ef268feefff835c Mon Sep 17 00:00:00 2001 From: Vxider Date: Mon, 16 May 2022 08:41:32 +0000 Subject: [PATCH 164/615] fix window view dependency --- src/Storages/WindowView/StorageWindowView.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index ad216b79ba6..485115fab8a 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1070,7 +1070,6 @@ StorageWindowView::StorageWindowView( select_table_name = "one"; } select_table_id = StorageID(select_database_name, select_table_name); - DatabaseCatalog::instance().addDependency(select_table_id, table_id_); /// Extract all info from query; substitute Function_tumble and Function_hop with Function_windowID. auto inner_query = innerQueryParser(select_query->as()); @@ -1415,6 +1414,8 @@ void StorageWindowView::writeIntoWindowView( void StorageWindowView::startup() { + DatabaseCatalog::instance().addDependency(select_table_id, getStorageID()); + // Start the working thread clean_cache_task->activateAndSchedule(); fire_task->activateAndSchedule(); @@ -1449,8 +1450,6 @@ void StorageWindowView::checkTableCanBeDropped() const void StorageWindowView::drop() { - DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID()); - /// Must be guaranteed at this point for database engine Atomic that has_inner_table == false, /// because otherwise will be a deadlock. dropInnerTableIfAny(true, getContext()); From 31eee576a5728359a9ff417b39465f85c910e2fb Mon Sep 17 00:00:00 2001 From: Vxider Date: Mon, 16 May 2022 09:04:37 +0000 Subject: [PATCH 165/615] do not addDependency when table is dropped --- src/Storages/StorageMaterializedView.cpp | 11 ++++++++--- src/Storages/StorageMaterializedView.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 171ad0bd877..99cc8a284b8 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -127,9 +127,6 @@ StorageMaterializedView::StorageMaterializedView( target_table_id = DatabaseCatalog::instance().getTable({manual_create_query->getDatabase(), manual_create_query->getTable()}, getContext())->getStorageID(); } - - if (!select.select_table_id.empty()) - DatabaseCatalog::instance().addDependency(select.select_table_id, getStorageID()); } QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( @@ -400,6 +397,14 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) DatabaseCatalog::instance().updateDependency(select_query.select_table_id, old_table_id, select_query.select_table_id, getStorageID()); } +void StorageMaterializedView::startup() +{ + auto metadata_snapshot = getInMemoryMetadataPtr(); + const auto & select_query = metadata_snapshot->getSelectQuery(); + if (!select_query.select_table_id.empty()) + DatabaseCatalog::instance().addDependency(select_query.select_table_id, getStorageID()); +} + void StorageMaterializedView::shutdown() { auto metadata_snapshot = getInMemoryMetadataPtr(); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 16817c930b2..001bf39f10f 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -69,6 +69,7 @@ public: void renameInMemory(const StorageID & new_table_id) override; + void startup() override; void shutdown() override; QueryProcessingStage::Enum From 191767d30058568e73f75fbb8fb565e2b0d613ee Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Fri, 13 May 2022 12:21:42 +0300 Subject: [PATCH 166/615] Reformat test file according to Check black --- .../test.py | 173 ++++++++++++------ 1 file changed, 118 insertions(+), 55 deletions(-) diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/test.py b/tests/integration/test_replicated_merge_tree_s3_restore/test.py index fbf595644f7..904bcfa4280 100644 --- a/tests/integration/test_replicated_merge_tree_s3_restore/test.py +++ b/tests/integration/test_replicated_merge_tree_s3_restore/test.py @@ -16,26 +16,46 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("node1z", - main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf.xml"], - macros={"cluster": "node_zero_copy", "replica": "0"}, - with_minio=True, with_zookeeper=True, stay_alive=True) - cluster.add_instance("node2z", - main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf.xml"], - macros={"cluster": "node_zero_copy", "replica": "1"}, - with_zookeeper=True, stay_alive=True) - cluster.add_instance("node1n", - main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf_without_zero_copy.xml"], - macros={"cluster": "node_no_zero_copy", "replica": "2"}, - with_minio=True, with_zookeeper=True, stay_alive=True) - cluster.add_instance("node2n", - main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf_without_zero_copy.xml"], - macros={"cluster": "node_no_zero_copy", "replica": "3"}, - with_zookeeper=True, stay_alive=True) - cluster.add_instance("node_another_bucket", - main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf_another_bucket.xml"], - macros={"cluster": "node_another_bucket", "replica": "0"}, - with_zookeeper=True, stay_alive=True) + cluster.add_instance( + "node1z", + main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf.xml"], + macros={"cluster": "node_zero_copy", "replica": "0"}, + with_minio=True, + with_zookeeper=True, + stay_alive=True, + ) + cluster.add_instance( + "node2z", + main_configs=COMMON_CONFIGS + ["configs/config.d/storage_conf.xml"], + macros={"cluster": "node_zero_copy", "replica": "1"}, + with_zookeeper=True, + stay_alive=True, + ) + cluster.add_instance( + "node1n", + main_configs=COMMON_CONFIGS + + ["configs/config.d/storage_conf_without_zero_copy.xml"], + macros={"cluster": "node_no_zero_copy", "replica": "2"}, + with_minio=True, + with_zookeeper=True, + stay_alive=True, + ) + cluster.add_instance( + "node2n", + main_configs=COMMON_CONFIGS + + ["configs/config.d/storage_conf_without_zero_copy.xml"], + macros={"cluster": "node_no_zero_copy", "replica": "3"}, + with_zookeeper=True, + stay_alive=True, + ) + cluster.add_instance( + "node_another_bucket", + main_configs=COMMON_CONFIGS + + ["configs/config.d/storage_conf_another_bucket.xml"], + macros={"cluster": "node_another_bucket", "replica": "0"}, + with_zookeeper=True, + stay_alive=True, + ) logging.info("Starting cluster...") cluster.start() @@ -48,12 +68,16 @@ def cluster(): def random_string(length): letters = string.ascii_letters - return ''.join(random.choice(letters) for i in range(length)) + return "".join(random.choice(letters) for i in range(length)) def create_table(node, table_name, schema, attach=False, db_atomic=False, uuid=""): - node.query("CREATE DATABASE IF NOT EXISTS s3 {on_cluster} ENGINE = {engine}".format(engine="Atomic" if db_atomic else "Ordinary", - on_cluster="ON CLUSTER '{cluster}'")) + node.query( + "CREATE DATABASE IF NOT EXISTS s3 {on_cluster} ENGINE = {engine}".format( + engine="Atomic" if db_atomic else "Ordinary", + on_cluster="ON CLUSTER '{cluster}'", + ) + ) create_table_statement = """ {create} TABLE s3.{table_name} {uuid} {on_cluster} ( @@ -66,12 +90,14 @@ def create_table(node, table_name, schema, attach=False, db_atomic=False, uuid=" storage_policy='s3', old_parts_lifetime=600, index_granularity=512 - """.format(create="ATTACH" if attach else "CREATE", - table_name=table_name, - uuid="UUID '{uuid}'".format(uuid=uuid) if db_atomic and uuid else "", - on_cluster="ON CLUSTER '{cluster}'", - schema=schema, - engine="ReplicatedMergeTree('/clickhouse/tables/{cluster}/test', '{replica}')") + """.format( + create="ATTACH" if attach else "CREATE", + table_name=table_name, + uuid="UUID '{uuid}'".format(uuid=uuid) if db_atomic and uuid else "", + on_cluster="ON CLUSTER '{cluster}'", + schema=schema, + engine="ReplicatedMergeTree('/clickhouse/tables/{cluster}/test', '{replica}')", + ) node.query(create_table_statement) @@ -85,37 +111,68 @@ def purge_s3(cluster, bucket): def drop_s3_metadata(node): - node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/disks/s3/*'], user='root') + node.exec_in_container( + ["bash", "-c", "rm -rf /var/lib/clickhouse/disks/s3/*"], user="root" + ) def drop_shadow_information(node): - node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/shadow/*'], user='root') + node.exec_in_container( + ["bash", "-c", "rm -rf /var/lib/clickhouse/shadow/*"], user="root" + ) def create_restore_file(node, revision=None, bucket=None, path=None, detached=None): - node.exec_in_container(['bash', '-c', 'mkdir -p /var/lib/clickhouse/disks/s3/'], user='root') - node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/disks/s3/restore'], user='root') + node.exec_in_container( + ["bash", "-c", "mkdir -p /var/lib/clickhouse/disks/s3/"], user="root" + ) + node.exec_in_container( + ["bash", "-c", "touch /var/lib/clickhouse/disks/s3/restore"], user="root" + ) add_restore_option = 'echo -en "{}={}\n" >> /var/lib/clickhouse/disks/s3/restore' if revision: - node.exec_in_container(['bash', '-c', add_restore_option.format('revision', revision)], user='root') + node.exec_in_container( + ["bash", "-c", add_restore_option.format("revision", revision)], user="root" + ) if bucket: - node.exec_in_container(['bash', '-c', add_restore_option.format('source_bucket', bucket)], user='root') + node.exec_in_container( + ["bash", "-c", add_restore_option.format("source_bucket", bucket)], + user="root", + ) if path: - node.exec_in_container(['bash', '-c', add_restore_option.format('source_path', path)], user='root') + node.exec_in_container( + ["bash", "-c", add_restore_option.format("source_path", path)], user="root" + ) if detached: - node.exec_in_container(['bash', '-c', add_restore_option.format('detached', 'true')], user='root') + node.exec_in_container( + ["bash", "-c", add_restore_option.format("detached", "true")], user="root" + ) def get_revision_counter(node, backup_number): - return int(node.exec_in_container( - ['bash', '-c', 'cat /var/lib/clickhouse/disks/s3/shadow/{}/revision.txt'.format(backup_number)], user='root')) + return int( + node.exec_in_container( + [ + "bash", + "-c", + "cat /var/lib/clickhouse/disks/s3/shadow/{}/revision.txt".format( + backup_number + ), + ], + user="root", + ) + ) def get_table_uuid(node, db_atomic, table): uuid = "" if db_atomic: - uuid = node.query("SELECT uuid FROM system.tables WHERE database='s3' AND table='{}' FORMAT TabSeparated".format(table)).strip() + uuid = node.query( + "SELECT uuid FROM system.tables WHERE database='s3' AND table='{}' FORMAT TabSeparated".format( + table + ) + ).strip() return uuid @@ -138,12 +195,8 @@ def drop_table(cluster): purge_s3(cluster, bucket) -@pytest.mark.parametrize( - "db_atomic", [False, True] -) -@pytest.mark.parametrize( - "zero_copy", [False, True] -) +@pytest.mark.parametrize("db_atomic", [False, True]) +@pytest.mark.parametrize("zero_copy", [False, True]) def test_restore_another_bucket_path(cluster, db_atomic, zero_copy): suffix = "z" if zero_copy else "n" nodes = [cluster.instances[f"node1{suffix}"], cluster.instances[f"node2{suffix}"]] @@ -160,18 +213,21 @@ def test_restore_another_bucket_path(cluster, db_atomic, zero_copy): create_table(nodes[0], "test", schema, db_atomic=db_atomic) uuid = get_table_uuid(nodes[0], db_atomic, "test") + dropped_keys = 0 - dropped_keys = 0 - for key in range(0, keys): node = nodes[key % 2] - node.query("INSERT INTO s3.test SELECT {key}, * FROM generateRandom('{schema}') LIMIT {size}".format(key=key, schema=schema, size=size)) + node.query( + "INSERT INTO s3.test SELECT {key}, * FROM generateRandom('{schema}') LIMIT {size}".format( + key=key, schema=schema, size=size + ) + ) if not (key % 3): dropped_keys += 1 node.query("ALTER TABLE s3.test DROP PARTITION '{key}'".format(key=key)) for key in range(0, keys): - if not ((key+1) % 3): + if not ((key + 1) % 3): dropped_keys += 1 node.query("ALTER TABLE s3.test DROP PARTITION '{key}'".format(key=key)) @@ -182,14 +238,21 @@ def test_restore_another_bucket_path(cluster, db_atomic, zero_copy): nodes[0].query("OPTIMIZE TABLE s3.test") nodes[1].query("OPTIMIZE TABLE s3.test") - assert nodes[0].query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(size * (keys - dropped_keys)) - assert nodes[1].query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(size * (keys - dropped_keys)) + assert nodes[0].query( + "SELECT count(*) FROM s3.test FORMAT Values" + ) == "({})".format(size * (keys - dropped_keys)) + assert nodes[1].query( + "SELECT count(*) FROM s3.test FORMAT Values" + ) == "({})".format(size * (keys - dropped_keys)) node_another_bucket = cluster.instances["node_another_bucket"] create_restore_file(node_another_bucket, bucket="root") node_another_bucket.query("SYSTEM RESTART DISK s3") - create_table(node_another_bucket, "test", schema, attach=True, db_atomic=db_atomic, uuid=uuid) - - assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(size * (keys - dropped_keys)) + create_table( + node_another_bucket, "test", schema, attach=True, db_atomic=db_atomic, uuid=uuid + ) + assert node_another_bucket.query( + "SELECT count(*) FROM s3.test FORMAT Values" + ) == "({})".format(size * (keys - dropped_keys)) From adb8ac4fdaf5438ea37c50d17d6e38db6ef8ed09 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 16 May 2022 13:15:31 +0000 Subject: [PATCH 167/615] Change log level --- src/Coordination/KeeperStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 86be0666fc0..1a7e3743948 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -290,7 +290,7 @@ namespace [[noreturn]] void onStorageInconsistency() { - LOG_INFO(&Poco::Logger::get("KeeperStorage"), "Inconsistency found between uncommitted and committed data. Keeper will terminate to avoid undefined behaviour."); + LOG_ERROR(&Poco::Logger::get("KeeperStorage"), "Inconsistency found between uncommitted and committed data. Keeper will terminate to avoid undefined behaviour."); std::terminate(); } From 8f271159bff02eeb11f8d38e94ed06de9f97909d Mon Sep 17 00:00:00 2001 From: Vxider Date: Mon, 16 May 2022 13:59:55 +0000 Subject: [PATCH 168/615] check null pointer --- src/Storages/WindowView/StorageWindowView.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index bb5e78fab9a..3321d7c9379 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -643,6 +643,9 @@ inline void StorageWindowView::fire(UInt32 watermark) tryLogCurrentException(__PRETTY_FUNCTION__); } + if (!blocks || blocks->empty()) + return; + for (const auto & block : *blocks) { for (auto & watch_stream : watch_streams) From 2d4b4b900880571844d60d29ade8be670c5b40a7 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 16 May 2022 14:19:44 +0000 Subject: [PATCH 169/615] Fix inserting defaults for missing values in columnar formats --- src/Storages/ColumnsDescription.cpp | 21 ++++++++++++ src/Storages/ColumnsDescription.h | 3 ++ src/Storages/HDFS/StorageHDFS.cpp | 3 +- src/Storages/StorageFile.cpp | 3 +- src/Storages/StorageS3.cpp | 3 +- src/Storages/StorageSnapshot.cpp | 33 +++++++++++++++++++ src/Storages/StorageSnapshot.h | 2 ++ src/Storages/StorageURL.cpp | 3 +- ...302_defaults_in_columnar_formats.reference | 3 ++ .../02302_defaults_in_columnar_formats.sql | 6 ++++ 10 files changed, 72 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/02302_defaults_in_columnar_formats.reference create mode 100644 tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 3aa5b28fed5..e11c2477572 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -572,6 +572,27 @@ std::optional ColumnsDescription::tryGetColumnOrSubcolumn(GetCo return tryGetColumn(GetColumnsOptions(kind).withSubcolumns(), column_name); } +std::optional ColumnsDescription::tryGetColumnDescription(const GetColumnsOptions & options, const String & column_name) const +{ + auto it = columns.get<1>().find(column_name); + if (it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & options.kind)) + return *it; + + if (options.with_subcolumns) + { + auto jt = subcolumns.get<0>().find(column_name); + if (jt != subcolumns.get<0>().end()) + return ColumnDescription{jt->name, jt->type}; + } + + return {}; +} + +std::optional ColumnsDescription::tryGetColumnOrSubcolumnDescription(GetColumnsOptions::Kind kind, const String & column_name) const +{ + return tryGetColumnDescription(GetColumnsOptions(kind).withSubcolumns(), column_name); +} + NameAndTypePair ColumnsDescription::getColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const { auto column = tryGetColumnOrSubcolumn(kind, column_name); diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 75db8b92545..a3921d254b2 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -180,6 +180,9 @@ public: std::optional tryGetColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const; std::optional tryGetColumn(const GetColumnsOptions & options, const String & column_name) const; + std::optional tryGetColumnOrSubcolumnDescription(GetColumnsOptions::Kind kind, const String & column_name) const; + std::optional tryGetColumnDescription(const GetColumnsOptions & options, const String & column_name) const; + ColumnDefaults getDefaults() const; /// TODO: remove bool hasDefault(const String & column_name) const; bool hasDefaults() const; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 0176487bbfe..d114bb67016 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -539,8 +539,7 @@ Pipe StorageHDFS::read( if (fetch_columns.empty()) fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()}; + columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 5b191b37f5e..47e32337dfe 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -691,8 +691,7 @@ Pipe StorageFile::read( const auto get_columns_for_format = [&]() -> ColumnsDescription { if (isColumnOriented()) - return ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; + return storage_snapshot->getDescriptionForColumns(column_names); else return storage_snapshot->metadata->getColumns(); }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6107c1a5117..d402dce5ede 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -719,8 +719,7 @@ Pipe StorageS3::read( if (fetch_columns.empty()) fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()}; + columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index a4b64c798f3..8dd2a52b647 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -124,6 +124,39 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) cons return res; } +ColumnsDescription StorageSnapshot::getDescriptionForColumns(const Names & column_names) const +{ + ColumnsDescription res; + const auto & columns = getMetadataForQuery()->getColumns(); + for (const auto & name : column_names) + { + auto column = columns.tryGetColumnOrSubcolumnDescription(GetColumnsOptions::All, name); + auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); + if (column && !object_column) + { + res.add(*column); + } + else if (object_column) + { + res.add({object_column->name, object_column->type}); + } + else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) + { + /// Virtual columns must be appended after ordinary, because user can + /// override them. + const auto & type = it->second; + res.add({name, type}); + } + else + { + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, + "Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs()); + } + } + + return res; +} + namespace { using DenseHashSet = google::dense_hash_set; diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index 909f4fd5cab..5b76a4b37e5 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -68,6 +68,8 @@ struct StorageSnapshot /// Block with ordinary + materialized + aliases + virtuals + subcolumns. Block getSampleBlockForColumns(const Names & column_names) const; + ColumnsDescription getDescriptionForColumns(const Names & column_names) const; + /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. void check(const Names & column_names) const; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 03bd1d5e7d9..e9814e519b7 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -602,8 +602,7 @@ Pipe IStorageURLBase::read( Block block_for_format; if (isColumnOriented()) { - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; + columns_description = storage_snapshot->getDescriptionForColumns(column_names); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/tests/queries/0_stateless/02302_defaults_in_columnar_formats.reference b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.reference new file mode 100644 index 00000000000..9de3c47b3b2 --- /dev/null +++ b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.reference @@ -0,0 +1,3 @@ +1 42 43 +1 42 43 +1 42 43 diff --git a/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql new file mode 100644 index 00000000000..46dedf12253 --- /dev/null +++ b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql @@ -0,0 +1,6 @@ +insert into function file(data_02302.parquet) select 1 as x settings engine_file_truncate_on_insert=1; +select * from file(data_02302.parquet, auto, 'x UInt8, y default 42, z default x + y') settings input_format_parquet_allow_missing_columns=1; +insert into function file(data_02302.orc) select 1 as x settings engine_file_truncate_on_insert=1; +select * from file(data_02302.orc, auto, 'x UInt8, y default 42, z default x + y') settings input_format_orc_allow_missing_columns=1; +insert into function file(data_02302.arrow) select 1 as x settings engine_file_truncate_on_insert=1; +select * from file(data_02302.arrow, auto, 'x UInt8, y default 42, z default x + y') settings input_format_arrow_allow_missing_columns=1; From ce834b10866b1447b6fc52f46ebbf36915b6b0d3 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 16 May 2022 14:40:25 +0000 Subject: [PATCH 170/615] Remove code duplication --- src/Storages/ColumnsDescription.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index e11c2477572..a7af2433875 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -543,16 +543,9 @@ Names ColumnsDescription::getNamesOfPhysical() const std::optional ColumnsDescription::tryGetColumn(const GetColumnsOptions & options, const String & column_name) const { - auto it = columns.get<1>().find(column_name); - if (it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & options.kind)) - return NameAndTypePair(it->name, it->type); - - if (options.with_subcolumns) - { - auto jt = subcolumns.get<0>().find(column_name); - if (jt != subcolumns.get<0>().end()) - return *jt; - } + auto column_description = tryGetColumnDescription(options, column_name); + if (column_description) + return NameAndTypePair{column_description->name, column_description->type}; return {}; } From 8572879c3786abbbb5a5b2e7d8782b9cac7eb897 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 16 May 2022 17:58:20 +0200 Subject: [PATCH 171/615] Remove redundant code --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 7449dc75c8b..c792d828e44 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -36,8 +36,6 @@ #include #include -#include - /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ @@ -475,7 +473,6 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto arrow_indexes_column = std::make_shared(indexes_array); auto indexes_column = readColumnWithIndexesData(arrow_indexes_column); - LOG_DEBUG(&Poco::Logger::get("Arrow"), "Indexes types: {} {}", arrow_indexes_column->type()->name(), indexes_column->getName()); auto lc_column = ColumnLowCardinality::create(dict_values->column, indexes_column); auto lc_type = std::make_shared(dict_values->type); return {std::move(lc_column), std::move(lc_type), column_name}; From d7dcb1f5d965f33baf032739dbe5e9ab814b8617 Mon Sep 17 00:00:00 2001 From: Vxider Date: Mon, 16 May 2022 17:49:14 +0000 Subject: [PATCH 172/615] update test --- tests/queries/0_stateless/01082_window_view_watch_limit.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py index 99808bc2cf7..0ae9e9d7309 100755 --- a/tests/queries/0_stateless/01082_window_view_watch_limit.py +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -45,6 +45,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01082_window_view_watch_limit.wv LIMIT 1") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, '1990/01/01 12:00:00');" ) From ceb7249916c9dfbca971b7367a9504bfa86bd6e5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 May 2022 19:57:48 +0200 Subject: [PATCH 173/615] Fix stupid buge --- src/IO/S3/PocoHTTPClient.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index d689d6b279c..cb8c91990a0 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -311,6 +311,11 @@ void PocoHTTPClient::makeRequestInternal( } LOG_TEST(log, "Received headers: {}", headers_ss.str()); } + else + { + for (const auto & [header_name, header_value] : poco_response) + response->AddHeader(header_name, header_value); + } if (status_code == 429 || status_code == 503) { // API throttling From 2ae9e2224800777de8f893e733ab7df6e3986674 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Mon, 16 May 2022 16:22:13 -0400 Subject: [PATCH 174/615] Improve CompressedWriteBuffer to avoid unnecessary memcpy --- src/Compression/CompressedWriteBuffer.cpp | 25 ++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index b73e4223f7d..52a7ea3c79e 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -22,14 +22,29 @@ void CompressedWriteBuffer::nextImpl() if (!offset()) return; + UInt32 compressed_size = 0; size_t decompressed_size = offset(); UInt32 compressed_reserve_size = codec->getCompressedReserveSize(decompressed_size); - compressed_buffer.resize(compressed_reserve_size); - UInt32 compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data()); + + if(out.available() > (compressed_reserve_size + CHECKSUM_SIZE)) + { + char *out_checksum_ptr = out.position(); + char *out_compressed_ptr = out.position() + CHECKSUM_SIZE; + compressed_size = codec->compress(working_buffer.begin(), decompressed_size, out_compressed_ptr); - CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size); - out.write(reinterpret_cast(&checksum), CHECKSUM_SIZE); - out.write(compressed_buffer.data(), compressed_size); + CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(out_compressed_ptr, compressed_size); + memcpy(out_checksum_ptr, &checksum, CHECKSUM_SIZE); + out.position() += CHECKSUM_SIZE + compressed_size; + } + else + { + compressed_buffer.resize(compressed_reserve_size); + compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data()); + + CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size); + out.write(reinterpret_cast(&checksum), CHECKSUM_SIZE); + out.write(compressed_buffer.data(), compressed_size); + } } CompressedWriteBuffer::~CompressedWriteBuffer() From f334cd371f58f30f98e5b3e46ba92a545809b811 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Mon, 16 May 2022 17:21:20 -0400 Subject: [PATCH 175/615] fixed code style issue --- src/Compression/CompressedWriteBuffer.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index 52a7ea3c79e..5d5d3a7187c 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -1,11 +1,11 @@ #include #include -#include #include +#include -#include "CompressedWriteBuffer.h" #include +#include "CompressedWriteBuffer.h" namespace DB @@ -25,11 +25,11 @@ void CompressedWriteBuffer::nextImpl() UInt32 compressed_size = 0; size_t decompressed_size = offset(); UInt32 compressed_reserve_size = codec->getCompressedReserveSize(decompressed_size); - - if(out.available() > (compressed_reserve_size + CHECKSUM_SIZE)) + + if (out.available() > compressed_reserve_size + CHECKSUM_SIZE) { - char *out_checksum_ptr = out.position(); - char *out_compressed_ptr = out.position() + CHECKSUM_SIZE; + char * out_checksum_ptr = out.position(); + char * out_compressed_ptr = out.position() + CHECKSUM_SIZE; compressed_size = codec->compress(working_buffer.begin(), decompressed_size, out_compressed_ptr); CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(out_compressed_ptr, compressed_size); @@ -52,10 +52,7 @@ CompressedWriteBuffer::~CompressedWriteBuffer() finalize(); } -CompressedWriteBuffer::CompressedWriteBuffer( - WriteBuffer & out_, - CompressionCodecPtr codec_, - size_t buf_size) +CompressedWriteBuffer::CompressedWriteBuffer(WriteBuffer & out_, CompressionCodecPtr codec_, size_t buf_size) : BufferWithOwnMemory(buf_size), out(out_), codec(std::move(codec_)) { } From fd28c19c1cb629b88c6d15f530f3d1e8473bf120 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 16 May 2022 20:44:22 +0200 Subject: [PATCH 176/615] Update CompressedWriteBuffer.cpp --- src/Compression/CompressedWriteBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index 5d5d3a7187c..af10b34c771 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -33,7 +33,7 @@ void CompressedWriteBuffer::nextImpl() compressed_size = codec->compress(working_buffer.begin(), decompressed_size, out_compressed_ptr); CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(out_compressed_ptr, compressed_size); - memcpy(out_checksum_ptr, &checksum, CHECKSUM_SIZE); + memcpy(out_checksum_ptr, reinterpret_cast(&checksum), CHECKSUM_SIZE); out.position() += CHECKSUM_SIZE + compressed_size; } else From 43945cea1bf3650eeda1351833d17dccb1c8a62b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 16 May 2022 20:59:27 +0200 Subject: [PATCH 177/615] Fixing some warnings --- base/base/ReplxxLineReader.cpp | 2 +- .../static-files-disk-uploader.cpp | 2 +- .../AggregateFunctionMannWhitney.h | 2 +- .../AggregateFunctionMeanZTest.h | 2 +- .../AggregateFunctionRankCorrelation.h | 2 +- src/AggregateFunctions/AggregateFunctionTTest.h | 2 +- .../Config/AbstractConfigurationComparison.cpp | 2 +- src/Common/Config/configReadClient.cpp | 2 +- src/Common/Exception.h | 14 +++++++------- src/Common/NetException.h | 4 ++-- src/Common/SensitiveDataMasker.h | 2 +- src/Common/ZooKeeper/IKeeper.h | 4 ++-- src/Core/Block.cpp | 2 +- src/Daemon/BaseDaemon.cpp | 6 +++--- src/Daemon/SentryWriter.h | 2 +- src/Dictionaries/DictionaryStructure.h | 2 +- src/Functions/addSubSeconds.cpp | 6 +++--- src/Interpreters/Context.cpp | 2 +- src/Interpreters/FilesystemCacheLog.cpp | 2 +- src/Interpreters/FilesystemCacheLog.h | 2 +- src/Interpreters/HashJoin.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Interpreters/UserDefinedExecutableFunction.cpp | 2 +- src/Interpreters/ZooKeeperLog.cpp | 2 +- src/Loggers/Loggers.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexMinMax.cpp | 2 +- src/Storages/MergeTree/RPNBuilder.h | 2 +- src/Storages/StorageExecutable.cpp | 2 +- 28 files changed, 40 insertions(+), 40 deletions(-) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 9ea53bb132b..0569567d4f8 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -378,4 +378,4 @@ void ReplxxLineReader::enableBracketedPaste() { bracketed_paste_enabled = true; rx.enable_bracketed_paste(); -}; +} diff --git a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp index 20307c0ccd3..a10c25c3342 100644 --- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp +++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp @@ -71,7 +71,7 @@ void processFile(const fs::path & file_path, const fs::path & dst_path, bool tes dst_buf->next(); dst_buf->finalize(); } -}; +} void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_mode, bool link) diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h index 887769dfbf5..089f70cd26b 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h @@ -245,4 +245,4 @@ public: }; -}; +} diff --git a/src/AggregateFunctions/AggregateFunctionMeanZTest.h b/src/AggregateFunctions/AggregateFunctionMeanZTest.h index e4be2503d87..7fecff591e6 100644 --- a/src/AggregateFunctions/AggregateFunctionMeanZTest.h +++ b/src/AggregateFunctions/AggregateFunctionMeanZTest.h @@ -136,4 +136,4 @@ public: } }; -}; +} diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h index 733416d4721..a9bf8254f35 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h @@ -102,4 +102,4 @@ public: }; -}; +} diff --git a/src/AggregateFunctions/AggregateFunctionTTest.h b/src/AggregateFunctions/AggregateFunctionTTest.h index 4c939121a72..7ef5cfce9c9 100644 --- a/src/AggregateFunctions/AggregateFunctionTTest.h +++ b/src/AggregateFunctions/AggregateFunctionTTest.h @@ -234,4 +234,4 @@ public: } }; -}; +} diff --git a/src/Common/Config/AbstractConfigurationComparison.cpp b/src/Common/Config/AbstractConfigurationComparison.cpp index ea0b3be4b98..711c754743d 100644 --- a/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/src/Common/Config/AbstractConfigurationComparison.cpp @@ -18,7 +18,7 @@ namespace result += '.'; result += subkey; return result; - }; + } } diff --git a/src/Common/Config/configReadClient.cpp b/src/Common/Config/configReadClient.cpp index e7bc0b72814..e5308bc3bc7 100644 --- a/src/Common/Config/configReadClient.cpp +++ b/src/Common/Config/configReadClient.cpp @@ -14,7 +14,7 @@ bool safeFsExists(const String & path) { std::error_code ec; return fs::exists(path, ec); -}; +} bool configReadClient(Poco::Util::LayeredConfiguration & config, const std::string & home_path) { diff --git a/src/Common/Exception.h b/src/Common/Exception.h index b2fc369237e..086b64bf5f9 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -48,8 +48,8 @@ public: Exception * clone() const override { return new Exception(*this); } void rethrow() const override { throw *this; } - const char * name() const throw() override { return "DB::Exception"; } - const char * what() const throw() override { return message().data(); } + const char * name() const noexcept override { return "DB::Exception"; } + const char * what() const noexcept override { return message().data(); } /// Add something to the existing message. template @@ -77,7 +77,7 @@ private: #endif bool remote = false; - const char * className() const throw() override { return "DB::Exception"; } + const char * className() const noexcept override { return "DB::Exception"; } }; @@ -102,8 +102,8 @@ private: int saved_errno; std::optional path; - const char * name() const throw() override { return "DB::ErrnoException"; } - const char * className() const throw() override { return "DB::ErrnoException"; } + const char * name() const noexcept override { return "DB::ErrnoException"; } + const char * className() const noexcept override { return "DB::ErrnoException"; } }; @@ -143,8 +143,8 @@ private: String file_name; mutable std::string formatted_message; - const char * name() const throw() override { return "DB::ParsingException"; } - const char * className() const throw() override { return "DB::ParsingException"; } + const char * name() const noexcept override { return "DB::ParsingException"; } + const char * className() const noexcept override { return "DB::ParsingException"; } }; diff --git a/src/Common/NetException.h b/src/Common/NetException.h index 019a12f23b9..712893ed83b 100644 --- a/src/Common/NetException.h +++ b/src/Common/NetException.h @@ -22,8 +22,8 @@ public: void rethrow() const override { throw *this; } private: - const char * name() const throw() override { return "DB::NetException"; } - const char * className() const throw() override { return "DB::NetException"; } + const char * name() const noexcept override { return "DB::NetException"; } + const char * className() const noexcept override { return "DB::NetException"; } }; } diff --git a/src/Common/SensitiveDataMasker.h b/src/Common/SensitiveDataMasker.h index edd9f10ca91..adb6f5d51e1 100644 --- a/src/Common/SensitiveDataMasker.h +++ b/src/Common/SensitiveDataMasker.h @@ -69,4 +69,4 @@ public: size_t rulesCount() const; }; -}; +} diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 74b45d411b0..1e79468b7e3 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -401,8 +401,8 @@ public: Exception(const Error code_, const std::string & path); /// NOLINT Exception(const Exception & exc); - const char * name() const throw() override { return "Coordination::Exception"; } - const char * className() const throw() override { return "Coordination::Exception"; } + const char * name() const noexcept override { return "Coordination::Exception"; } + const char * className() const noexcept override { return "Coordination::Exception"; } Exception * clone() const override { return new Exception(*this); } const Error code; diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index a21c96abfdb..33f5095d385 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -35,7 +35,7 @@ static ReturnType onError(const std::string & message [[maybe_unused]], int code throw Exception(message, code); else return false; -}; +} template diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index a89e45dde59..6364b0a8f48 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -451,7 +451,7 @@ static std::string createDirectory(const std::string & file) return ""; fs::create_directories(path); return path; -}; +} static bool tryCreateDirectories(Poco::Logger * logger, const std::string & path) @@ -794,7 +794,7 @@ static void addSignalHandler(const std::vector & signals, signal_function h if (out_handled_signals) std::copy(signals.begin(), signals.end(), std::back_inserter(*out_handled_signals)); -}; +} static void blockSignals(const std::vector & signals) @@ -816,7 +816,7 @@ static void blockSignals(const std::vector & signals) if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) throw Poco::Exception("Cannot block signal."); -}; +} void BaseDaemon::initializeTerminationAndSignalProcessing() diff --git a/src/Daemon/SentryWriter.h b/src/Daemon/SentryWriter.h index 0888b2fe9a3..32aeff2787a 100644 --- a/src/Daemon/SentryWriter.h +++ b/src/Daemon/SentryWriter.h @@ -24,4 +24,4 @@ namespace SentryWriter int sig, const std::string & error_message, const StackTrace & stack_trace); -}; +} diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 9014b09b072..50cfba01894 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -89,7 +89,7 @@ constexpr void callOnDictionaryAttributeType(AttributeUnderlyingType type, F && if (type == other) func(DictionaryAttributeType{}); }); -}; +} struct DictionarySpecialAttribute final { diff --git a/src/Functions/addSubSeconds.cpp b/src/Functions/addSubSeconds.cpp index f58f8b20b99..cb5ffce61e3 100644 --- a/src/Functions/addSubSeconds.cpp +++ b/src/Functions/addSubSeconds.cpp @@ -9,19 +9,19 @@ using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval(); -}; +} using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionAddMicroseconds(FunctionFactory & factory) { factory.registerFunction(); -}; +} using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionAddMilliseconds(FunctionFactory & factory) { factory.registerFunction(); -}; +} } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 34f396b978c..4f951d69349 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1378,7 +1378,7 @@ void Context::killCurrentQuery() { process_list_elem->cancelQuery(true); } -}; +} String Context::getDefaultFormat() const { diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 609305321b1..f03472c45e0 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -75,4 +75,4 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(read_buffer_id); } -}; +} diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 77bae7d788a..a3624867aec 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -58,4 +58,4 @@ class FilesystemCacheLog : public SystemLog using SystemLog::SystemLog; }; -}; +} diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 138d10efb35..ec260ce717e 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1176,7 +1176,7 @@ void addFoundRowAll( ++current_offset; } } -}; +} template void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 9db61bcfc9d..f7bd9000278 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -203,7 +203,7 @@ static bool isTrivialSelect(const ASTPtr & select) } /// This query is ASTSelectWithUnionQuery subquery return false; -}; +} Chain InterpreterInsertQuery::buildChain( const StoragePtr & table, diff --git a/src/Interpreters/UserDefinedExecutableFunction.cpp b/src/Interpreters/UserDefinedExecutableFunction.cpp index e5a852b0e75..477a1b10f3c 100644 --- a/src/Interpreters/UserDefinedExecutableFunction.cpp +++ b/src/Interpreters/UserDefinedExecutableFunction.cpp @@ -21,4 +21,4 @@ UserDefinedExecutableFunction::UserDefinedExecutableFunction( { } -}; +} diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 2828c3e2fa8..6394b1d5429 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -209,4 +209,4 @@ void ZooKeeperLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(children_array); } -}; +} diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 512e44f79c7..70205998bb5 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -32,7 +32,7 @@ static std::string createDirectory(const std::string & file) return ""; fs::create_directories(path); return path; -}; +} #ifdef WITH_TEXT_LOG void Loggers::setTextLog(std::shared_ptr log, int max_priority) diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 16d11fb7e33..b257a1db090 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -196,7 +196,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const { return std::make_shared(index, query, context); -}; +} bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const { diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index 183808c9290..27b616dc301 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -120,4 +120,4 @@ private: }; -}; +} diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 4a4317c9aab..9638e5186f9 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -234,5 +234,5 @@ void registerStorageExecutable(StorageFactory & factory) }, storage_features); } -}; +} From 466a02ba8fab99b308523a30bdb5de8057528f67 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 May 2022 21:31:40 +0200 Subject: [PATCH 178/615] Follow HDFS --- src/Disks/DiskObjectStorage.cpp | 74 +++++++++++++- src/Disks/DiskObjectStorage.h | 13 +-- src/Disks/HDFSObjectStorage.cpp | 164 ++++++++++++++++++++++++++++++++ src/Disks/HDFSObjectStorage.h | 120 +++++++++++++++++++++++ src/Disks/IDisk.h | 9 +- src/Disks/IDiskObjectStorage.h | 8 ++ src/Disks/IObjectStorage.h | 2 + src/Disks/S3/diskSettings.cpp | 1 - src/Disks/S3/registerDiskS3.cpp | 8 +- src/Disks/S3ObjectStorage.cpp | 1 + src/Disks/S3ObjectStorage.h | 6 +- src/Disks/WriteMode.h | 15 +++ 12 files changed, 394 insertions(+), 27 deletions(-) create mode 100644 src/Disks/HDFSObjectStorage.cpp create mode 100644 src/Disks/HDFSObjectStorage.h create mode 100644 src/Disks/IDiskObjectStorage.h create mode 100644 src/Disks/WriteMode.h diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index b2d2bf23652..b0679051eec 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -37,6 +37,74 @@ static String revisionToString(UInt64 revision) return std::bitset<64>(revision).to_string(); } +namespace +{ + +/// Runs tasks asynchronously using thread pool. +class AsyncThreadPoolExecutor : public Executor +{ +public: + AsyncThreadPoolExecutor(const String & name_, int thread_pool_size) + : name(name_) + , pool(ThreadPool(thread_pool_size)) {} + + std::future execute(std::function task) override + { + auto promise = std::make_shared>(); + pool.scheduleOrThrowOnError( + [promise, task]() + { + try + { + task(); + promise->set_value(); + } + catch (...) + { + tryLogCurrentException("Failed to run async task"); + + try + { + promise->set_exception(std::current_exception()); + } + catch (...) {} + } + }); + + return promise->get_future(); + } + + void setMaxThreads(size_t threads) + { + pool.setMaxThreads(threads); + } + +private: + String name; + ThreadPool pool; +}; + +} + +DiskObjectStorage::DiskObjectStorage( + const String & name_, + const String & remote_fs_root_path_, + const String & log_name, + DiskPtr metadata_disk_, + ObjectStoragePtr && object_storage_, + DiskType disk_type_, + bool send_metadata_, + uint64_t thread_pool_size) + : IDisk(std::make_unique(log_name, thread_pool_size)) + , name(name_) + , remote_fs_root_path(remote_fs_root_path_) + , log (&Poco::Logger::get(log_name)) + , metadata_disk(metadata_disk_) + , disk_type(disk_type_) + , object_storage(std::move(object_storage_)) + , send_metadata(send_metadata_) + , metadata_helper(std::make_unique(this, ReadSettings{})) +{} DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) { @@ -715,7 +783,11 @@ std::unique_ptr DiskObjectStorage::writeFile( void DiskObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) { - object_storage->applyNewSettings(config, "storage_configuration.disks." + name, context_); + const auto config_prefix = "storage_configuration.disks." + name; + object_storage->applyNewSettings(config, config_prefix, context_); + + if (AsyncThreadPoolExecutor * exec = dynamic_cast(&getExecutor())) + exec->setMaxThreads(config.getInt(config_prefix + ".thread_pool_size", 16)); } void DiskObjectStorage::restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index a67cc78b82b..f1687fe19b6 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -14,6 +14,7 @@ namespace DB class DiskObjectStorageMetadataHelper; + class DiskObjectStorage : public IDisk { @@ -28,16 +29,8 @@ public: DiskPtr metadata_disk_, ObjectStoragePtr && object_storage_, DiskType disk_type_, - bool send_metadata_) - : name(name_) - , remote_fs_root_path(remote_fs_root_path_) - , log (&Poco::Logger::get(log_name)) - , metadata_disk(metadata_disk_) - , disk_type(disk_type_) - , object_storage(std::move(object_storage_)) - , send_metadata(send_metadata_) - , metadata_helper(std::make_unique(this, ReadSettings{})) - {} + bool send_metadata_, + uint64_t thread_pool_size); DiskType getType() const override { return disk_type; } diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp new file mode 100644 index 00000000000..646ec6c8bd4 --- /dev/null +++ b/src/Disks/HDFSObjectStorage.cpp @@ -0,0 +1,164 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#if USE_HDFS + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; +} + +void HDFSObjectStorage::shutdown() +{ +} + +void HDFSObjectStorage::startup() +{ +} + +bool HDFSObjectStorage::exists(const std::string & hdfs_uri) const +{ + const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); + const String remote_fs_object_path = hdfs_uri.substr(begin_of_path); + return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); + +} + +std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto buf = std::make_unique(path, path, config, read_settings.remote_fs_buffer_size); + + return std::make_unique(std::move(buf), settings->min_bytes_for_seek); +} + +std::unique_ptr HDFSObjectStorage::readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto hdfs_impl = std::make_unique(config, common_path_prefix, common_path_prefix, blobs_to_read, read_settings); + auto buf = std::make_unique(std::move(hdfs_impl)); + return std::make_unique(std::move(buf), settings->min_bytes_for_seek); +} + + /// Open the file for write and return WriteBufferFromFileBase object. +std::unique_ptr HDFSObjectStorage::writeObject( /// NOLINT + const std::string & path, + WriteMode mode, + std::optional attributes, + FinalizeCallback && finalize_callback, + size_t buf_size, + const WriteSettings &) +{ + if (attributes.has_value()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + + /// Single O_WRONLY in libhdfs adds O_TRUNC + auto hdfs_buffer = std::make_unique(path, + config, settings->replication, buf_size, + mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); + + return std::make_unique(std::move(hdfs_buffer), std::move(finalize_callback), path); +} + + +void HDFSObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +{ + const size_t begin_of_path = path.find('/', path.find("//") + 2); + int32_t num_entries; + auto * files_list = hdfsListDirectory(hdfs_fs.get(), path.substr(begin_of_path).c_str(), &num_entries); + if (num_entries == -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + path); + + for (int32_t i = 0; i < num_entries; ++i) + children.emplace_back(files_list[i].mName, files_list[i].mSize); +} + +/// Remove file. Throws exception if file doesn't exists or it's a directory. +void HDFSObjectStorage::removeObject(const std::string & path) +{ + const size_t begin_of_path = path.find('/', path.find("//") + 2); + + /// Add path from root to file name + int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0); + if (res == -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + path); + +} + +void HDFSObjectStorage::removeObjects(const std::vector & paths) +{ + for (const auto & hdfs_path : paths) + { + const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); + + /// Add path from root to file name + int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); + if (res == -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); + } +} + +void HDFSObjectStorage::removeObjectIfExists(const std::string & path) +{ + if (exists(path)) + removeObject(path); +} + +void HDFSObjectStorage::removeObjectsIfExist(const std::vector & paths) +{ + for (const auto & hdfs_path : paths) + { + if (!exists(hdfs_path)) + continue; + + const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); + + /// Add path from root to file name + int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); + if (res == -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); + } +} + +ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const +{ + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); +} + +void HDFSObjectStorage::copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes) +{ + if (object_to_attributes.has_value()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + + auto in = readObject(object_from); + auto out = writeObject(object_to); + copyData(*in, *out); + out->finalize(); +} + + +} + +#endif diff --git a/src/Disks/HDFSObjectStorage.h b/src/Disks/HDFSObjectStorage.h new file mode 100644 index 00000000000..397741d2c4a --- /dev/null +++ b/src/Disks/HDFSObjectStorage.h @@ -0,0 +1,120 @@ +#pragma once +#include + + +#if USE_HDFS + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +struct HDFSObjectStorageSettings +{ + + HDFSObjectStorageSettings() = default; + + size_t min_bytes_for_seek; + int objects_chunk_size_to_delete; + int replication; + + HDFSObjectStorageSettings( + int min_bytes_for_seek_, + int objects_chunk_size_to_delete_, + int replication_) + : min_bytes_for_seek(min_bytes_for_seek_) + , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) + , replication(replication_) + {} +}; + + +class HDFSObjectStorage : public IObjectStorage +{ +public: + + using SettingsPtr = std::unique_ptr; + + HDFSObjectStorage( + FileCachePtr && cache_, + const String & hdfs_root_path_, + SettingsPtr settings_, + const Poco::Util::AbstractConfiguration & config_) + : IObjectStorage(std::move(cache_)) + , config(config_) + , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) + , hdfs_fs(createHDFSFS(hdfs_builder.get())) + , settings(std::move(settings_)) + {} + + bool exists(const std::string & hdfs_uri) const override; + + std::unique_ptr readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + std::unique_ptr readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + /// Open the file for write and return WriteBufferFromFileBase object. + std::unique_ptr writeObject( /// NOLINT + const std::string & path, + WriteMode mode, + std::optional attributes = {}, + FinalizeCallback && finalize_callback = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + const WriteSettings & write_settings = {}) override; + + void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + /// Remove file. Throws exception if file doesn't exists or it's a directory. + void removeObject(const std::string & path) override; + + void removeObjects(const std::vector & paths) override; + + void removeObjectIfExists(const std::string & path) override; + + void removeObjectsIfExist(const std::vector & paths) override; + + ObjectMetadata getObjectMetadata(const std::string & path) const override; + + void copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes = {}) override; + + void shutdown() override; + + void startup() override; + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + + String getObjectsNamespace() const override { return bucket; } + + std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + + +private: + const Poco::Util::AbstractConfiguration & config; + + HDFSBuilderWrapper hdfs_builder; + HDFSFSPtr hdfs_fs; + + SettingsPtr settings; + + +}; + +} + +#endif diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index e4a0b84448c..51e380df6df 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -49,14 +50,6 @@ class ReadBufferFromFileBase; class WriteBufferFromFileBase; class MMappedFileCache; -/** - * Mode of opening a file for write. - */ -enum class WriteMode -{ - Rewrite, - Append -}; /** * Provide interface for reservation. diff --git a/src/Disks/IDiskObjectStorage.h b/src/Disks/IDiskObjectStorage.h new file mode 100644 index 00000000000..90794301e54 --- /dev/null +++ b/src/Disks/IDiskObjectStorage.h @@ -0,0 +1,8 @@ +#pragma once + +#include + +namespace DB +{ + +} diff --git a/src/Disks/IObjectStorage.h b/src/Disks/IObjectStorage.h index 64ba6e75281..e5eb08f145d 100644 --- a/src/Disks/IObjectStorage.h +++ b/src/Disks/IObjectStorage.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -80,6 +81,7 @@ public: /// Open the file for write and return WriteBufferFromFileBase object. virtual std::unique_ptr writeObject( /// NOLINT const std::string & path, + WriteMode mode, std::optional attributes = {}, FinalizeCallback && finalize_callback = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/S3/diskSettings.cpp index 4ff322b5b54..9f170799bb9 100644 --- a/src/Disks/S3/diskSettings.cpp +++ b/src/Disks/S3/diskSettings.cpp @@ -22,7 +22,6 @@ std::unique_ptr getSettings(const Poco::Util::AbstractC return std::make_unique( rw_settings, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".thread_pool_size", 16), config.getInt(config_prefix + ".list_object_keys_size", 1000), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); } diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index de1194d6daf..fda1a1f51b0 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -80,13 +80,15 @@ void registerDiskS3(DiskFactory & factory) FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); - bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); ObjectStoragePtr s3_storage = std::make_unique( std::move(cache), getClient(config, config_prefix, context), getSettings(config, config_prefix, context), uri.version_id, uri.bucket); + bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); + uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); + std::shared_ptr s3disk = std::make_shared( name, uri.key, @@ -94,7 +96,8 @@ void registerDiskS3(DiskFactory & factory) metadata_disk, std::move(s3_storage), DiskType::S3, - send_metadata); + send_metadata, + copy_thread_pool_size); /// This code is used only to check access to the corresponding disk. if (!config.getBool(config_prefix + ".skip_access_check", false)) @@ -122,7 +125,6 @@ void registerDiskS3(DiskFactory & factory) disk_result = wrapWithCache(disk_result, "s3-cache", cache_path, metadata_path); } - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "DONE DISK"); return std::make_shared(disk_result); }; factory.registerDiskType("s3", creator); diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index 3f26937a29b..3feea30ace3 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -133,6 +133,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT std::unique_ptr S3ObjectStorage::writeObject( /// NOLINT const std::string & path, + WriteMode /* mode */, // S3 doesn't support append, only rewrite std::optional attributes, FinalizeCallback && finalize_callback, size_t buf_size, diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/S3ObjectStorage.h index fcc99ae1d91..12a8930c596 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/S3ObjectStorage.h @@ -1,10 +1,10 @@ #pragma once -#include #include #if USE_AWS_S3 +#include #include #include #include @@ -22,12 +22,10 @@ struct S3ObjectStorageSettings S3ObjectStorageSettings( const S3Settings::ReadWriteSettings & s3_settings_, uint64_t min_bytes_for_seek_, - uint64_t thread_pool_size_, int32_t list_object_keys_size_, int32_t objects_chunk_size_to_delete_) : s3_settings(s3_settings_) , min_bytes_for_seek(min_bytes_for_seek_) - , thread_pool_size(thread_pool_size_) , list_object_keys_size(list_object_keys_size_) , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) {} @@ -35,7 +33,6 @@ struct S3ObjectStorageSettings S3Settings::ReadWriteSettings s3_settings; uint64_t min_bytes_for_seek; - uint64_t thread_pool_size; int32_t list_object_keys_size; int32_t objects_chunk_size_to_delete; }; @@ -75,6 +72,7 @@ public: /// Open the file for write and return WriteBufferFromFileBase object. std::unique_ptr writeObject( /// NOLINT const std::string & path, + WriteMode mode, std::optional attributes = {}, FinalizeCallback && finalize_callback = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Disks/WriteMode.h b/src/Disks/WriteMode.h new file mode 100644 index 00000000000..4a73e92ccab --- /dev/null +++ b/src/Disks/WriteMode.h @@ -0,0 +1,15 @@ +#pragma once + +namespace DB +{ + +/** + * Mode of opening a file for write. + */ +enum class WriteMode +{ + Rewrite, + Append +}; + +} From d8ad9ad2a6953559ce2403668caf11f776055968 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 17 May 2022 09:27:03 +0800 Subject: [PATCH 179/615] update codes --- src/Storages/Hive/StorageHive.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 1bacb9cb72e..1d0fe1728a8 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -151,7 +151,7 @@ public: { if (!reader) { - if (current_file_remained_rows) [[unlikely]] + if (current_file_remained_rows) { return generateChunkByPartitionKeys(); } @@ -300,15 +300,6 @@ public: auto col_idx = sample_block.getPositionByName(names[i]); cols.insert(cols.begin() + col_idx, col); } - - if (source_info->need_file_column) - { - size_t last_slash_pos = current_file->getPath().find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - - auto col = DataTypeLowCardinality{std::make_shared()}.createColumnConst(rows, std::move(file_name)); - cols.push_back(col); - } return Chunk(std::move(cols), rows); } From f50803b0b8b9f1ec5cbf0f33c0e52e6f18a12778 Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 17 May 2022 02:18:09 +0000 Subject: [PATCH 180/615] update test --- .../01078_window_view_alter_query_watch.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 96abfda12c9..05961a8468b 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -39,22 +39,29 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send( - "CREATE TABLE 01078_window_view_alter_query_watch.mt(a Int32) ENGINE=MergeTree ORDER BY tuple()" + "CREATE TABLE 01078_window_view_alter_query_watch.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01078_window_view_alter_query_watch.wv AS SELECT count(a) AS count FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(now('US/Samoa'), INTERVAL '1' SECOND, 'US/Samoa') AS wid;" + "CREATE WINDOW VIEW 01078_window_view_alter_query_watch.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect(prompt) client1.send("WATCH 01078_window_view_alter_query_watch.wv") client1.expect("Query id" + end_of_block) - client2.send("INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1)") + client1.expect("Progress: 0.00 rows.*\)") + client2.send( + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:00');" + ) + client2.expect("Ok.") + client2.send( + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:05');" + ) client2.expect("Ok.") client1.expect("1" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") client2.send( - "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) AS count FROM 01078_window_view_alter_query_watch.mt GROUP BY tumble(now('US/Samoa'), INTERVAL '1' SECOND, 'US/Samoa') AS wid;" + "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client2.expect("Ok.") client2.expect(prompt) @@ -62,7 +69,14 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client3.send("WATCH 01078_window_view_alter_query_watch.wv") client3.expect("Query id" + end_of_block) - client2.send("INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1)") + client3.expect("Progress: 0.00 rows.*\)") + client2.send( + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:06');" + ) + client2.expect("Ok.") + client2.send( + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:10');" + ) client2.expect("Ok.") client3.expect("2" + end_of_block) client3.expect("Progress: 1.00 rows.*\)") From 3a32a22f398c8015c315a13534b476069c6e653d Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 17 May 2022 11:55:30 +0800 Subject: [PATCH 181/615] support using multi disks for caching hive files --- .../Cache/ExternalDataSourceCache.cpp | 76 ++++++++++++++----- src/Storages/Cache/ExternalDataSourceCache.h | 2 +- 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index 2440b518568..f5342eb9cbc 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,10 @@ #include #include #include +#include "Core/Types.h" +#include "base/types.h" +#include +#include namespace ProfileEvents { @@ -162,28 +167,56 @@ ExternalDataSourceCache & ExternalDataSourceCache::instance() void ExternalDataSourceCache::recoverTask() { std::vector invalid_paths; - for (auto const & group_dir : fs::directory_iterator{root_dir}) + for (size_t i = 0, sz = root_dirs.size(); i < sz; ++i) { - for (auto const & cache_dir : fs::directory_iterator{group_dir.path()}) + const auto & root_dir = root_dirs[i]; + for (auto const & group_dir : fs::directory_iterator{root_dir}) { - String path = cache_dir.path(); - auto cache_controller = RemoteCacheController::recover(path); - if (!cache_controller) + for (auto const & cache_dir : fs::directory_iterator{group_dir.path()}) { - invalid_paths.emplace_back(path); - continue; - } - auto cache_load_func = [&] { return cache_controller; }; - if (!lru_caches->getOrSet(path, cache_load_func)) - { - invalid_paths.emplace_back(path); + String subpath = cache_dir.path().stem(); + String path = cache_dir.path(); + size_t root_dir_idx = ConsistentHashing(sipHash64(subpath.c_str(), subpath.size()), sz); + if (root_dir_idx != i) + { + // When the root_dirs has been changed, to simplify just delete the old cached files. + LOG_TRACE( + log, + "Drop file({}) since root_dir is not match. prev dir is {}, and it should be {}", + path, + root_dirs[i], + root_dirs[root_dir_idx]); + invalid_paths.emplace_back(path); + continue; + } + auto cache_controller = RemoteCacheController::recover(path); + if (!cache_controller) + { + invalid_paths.emplace_back(path); + continue; + } + auto cache_load_func = [&] { return cache_controller; }; + if (!lru_caches->getOrSet(path, cache_load_func)) + { + invalid_paths.emplace_back(path); + } } } } for (auto & path : invalid_paths) fs::remove_all(path); initialized = true; - LOG_INFO(log, "Recovered from directory:{}", root_dir); + + auto root_dirs_to_string = [&]() + { + String res; + for (const auto & root_dir : root_dirs) + { + res += root_dir + ","; + } + return res; + }; + LOG_INFO(log, "Recovered from directory:{}", root_dirs_to_string()); } void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_) @@ -195,14 +228,22 @@ void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_d } LOG_INFO( log, "Initializing local cache for remote data sources. Local cache root path: {}, cache size limit: {}", root_dir_, limit_size_); - root_dir = root_dir_; + Poco::StringTokenizer tokenizer(root_dir_, ","); + for (size_t i = 0; i < tokenizer.count(); ++i) + { + root_dirs.emplace_back(tokenizer[i]); + } + std::sort(root_dirs.begin(), root_dirs.end()); local_cache_bytes_read_before_flush = bytes_read_before_flush_; lru_caches = std::make_unique(limit_size_); /// Create if root_dir not exists. - if (!fs::exists(fs::path(root_dir))) + for (const auto & root_dir : root_dirs) { - fs::create_directories(fs::path(root_dir)); + if (!fs::exists(fs::path(root_dir))) + { + fs::create_directories(fs::path(root_dir)); + } } recover_task_holder = context->getSchedulePool().createTask("recover local cache metadata for remote files", [this] { recoverTask(); }); @@ -215,7 +256,8 @@ String ExternalDataSourceCache::calculateLocalPath(IRemoteFileMetadataPtr metada String full_path = metadata->getName() + ":" + metadata->remote_path + ":" + metadata->getVersion(); UInt128 hashcode = sipHash128(full_path.c_str(), full_path.size()); String hashcode_str = getHexUIntLowercase(hashcode); - return fs::path(root_dir) / hashcode_str.substr(0, 3) / hashcode_str; + size_t root_dir_idx = ConsistentHashing(sipHash64(hashcode_str.c_str(), hashcode_str.size()), root_dirs.size()); + return fs::path(root_dirs[root_dir_idx]) / hashcode_str.substr(0, 3) / hashcode_str; } std::pair, std::unique_ptr> ExternalDataSourceCache::createReader( diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index ec0aeea4985..18d3d5ca699 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -83,7 +83,7 @@ protected: private: // Root directory of local cache for remote filesystem. - String root_dir; + Strings root_dirs; size_t local_cache_bytes_read_before_flush = 0; std::atomic initialized = false; From 319115cc636dc823cc6be72b01b155f030249bf0 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 17 May 2022 11:57:58 +0800 Subject: [PATCH 182/615] update test case --- tests/integration/test_hive_query/configs/config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_hive_query/configs/config.xml b/tests/integration/test_hive_query/configs/config.xml index e89ccdcab6a..7de1391e56c 100644 --- a/tests/integration/test_hive_query/configs/config.xml +++ b/tests/integration/test_hive_query/configs/config.xml @@ -13,7 +13,7 @@ true - /tmp/clickhouse_local_cache + /tmp/clickhouse_local_cache,/tmp/clickhouse_local_cache1 207374182400 1048576 From bc81302bf456367b25fa94ff5268266990e0be6a Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 17 May 2022 12:10:55 +0800 Subject: [PATCH 183/615] fixed code-style --- src/Storages/Cache/ExternalDataSourceCache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index f5342eb9cbc..c408e82a668 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -15,8 +15,8 @@ #include #include #include -#include "Core/Types.h" -#include "base/types.h" +#include +#include #include #include From 90bac2f004d467f6d0d46f78c5bb2881c361567e Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 17 May 2022 06:20:08 +0000 Subject: [PATCH 184/615] add shutdown check --- src/Storages/WindowView/StorageWindowView.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 265c1b4f1d8..e5f19cc1849 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -913,7 +913,8 @@ void StorageWindowView::threadFuncCleanup() { try { - cleanup(); + if (!shutdown_called) + cleanup(); } catch (...) { @@ -926,6 +927,9 @@ void StorageWindowView::threadFuncCleanup() void StorageWindowView::threadFuncFireProc() { + if (shutdown_called) + return; + std::unique_lock lock(fire_signal_mutex); UInt32 timestamp_now = std::time(nullptr); From 573e42d0ddb523edf7c4765e667dd21e5ae71ffa Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 17 May 2022 06:45:51 +0000 Subject: [PATCH 185/615] Address PR comments --- src/Coordination/KeeperStorage.cpp | 85 ++++++++++++++++-------------- src/Coordination/KeeperStorage.h | 22 ++++++-- 2 files changed, 64 insertions(+), 43 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 1a7e3743948..2a31db415a2 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -186,6 +186,9 @@ struct Overloaded : Ts... { using Ts::operator()...; }; + +// explicit deduction guide +// https://en.cppreference.com/w/cpp/language/class_template_argument_deduction template Overloaded(Ts...) -> Overloaded; @@ -712,19 +715,14 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce } } - const auto on_error = [&]([[maybe_unused]] const auto error_code) - { - if constexpr (local) - response.error = error_code; - else - onStorageInconsistency(); - }; - auto & container = storage.container; auto node_it = container.find(request.path); if (node_it == container.end()) { - on_error(Coordination::Error::ZNONODE); + if constexpr (local) + response.error = Coordination::Error::ZNONODE; + else + onStorageInconsistency(); } else { @@ -856,19 +854,14 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr } } - const auto on_error = [&]([[maybe_unused]] const auto error_code) - { - if constexpr (local) - response.error = error_code; - else - onStorageInconsistency(); - }; - auto & container = storage.container; auto node_it = container.find(request.path); if (node_it == container.end()) { - on_error(Coordination::Error::ZNONODE); + if constexpr (local) + response.error = Coordination::Error::ZNONODE; + else + onStorageInconsistency(); } else { @@ -1008,19 +1001,14 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc } } - const auto on_error = [&]([[maybe_unused]] const auto error_code) - { - if constexpr (local) - response.error = error_code; - else - onStorageInconsistency(); - }; - auto & container = storage.container; auto node_it = container.find(request.path); if (node_it == container.end()) { - on_error(Coordination::Error::ZNONODE); + if constexpr (local) + response.error = Coordination::Error::ZNONODE; + else + onStorageInconsistency(); } else { @@ -1158,9 +1146,22 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr if (!fixupACL(request.acls, session_auth_ids, node_acls)) return {{zxid, Coordination::Error::ZINVALIDACL}}; - return { - {request.path, zxid, KeeperStorage::SetACLDelta{std::move(node_acls), request.version}}, - {request.path, zxid, KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & n) { ++n.stat.aversion; }}}}; + return + { + { + request.path, + zxid, + KeeperStorage::SetACLDelta{std::move(node_acls), request.version} + }, + { + request.path, + zxid, + KeeperStorage::UpdateNodeDelta + { + [](KeeperStorage::Node & n) { ++n.stat.aversion; } + } + } + }; } Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override @@ -1221,19 +1222,14 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr } } - const auto on_error = [&]([[maybe_unused]] const auto error_code) - { - if constexpr (local) - response.error = error_code; - else - onStorageInconsistency(); - }; - auto & container = storage.container; auto node_it = container.find(request.path); if (node_it == container.end()) { - on_error(Coordination::Error::ZNONODE); + if constexpr (local) + response.error = Coordination::Error::ZNONODE; + else + onStorageInconsistency(); } else { @@ -1338,6 +1334,8 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); auto & deltas = storage.uncommitted_state.deltas; + // the deltas will have atleast SubDeltaEnd or FailedMultiDelta + assert(!deltas.empty()); if (auto * failed_multi = std::get_if(&deltas.front().operation)) { for (size_t i = 0; i < concrete_requests.size(); ++i) @@ -1559,6 +1557,9 @@ void KeeperStorage::preprocessRequest( { for (const auto & ephemeral_path : session_ephemerals->second) { + // For now just add deltas for removing the node + // On commit, ephemerals nodes will be deleted from storage + // and removed from the session if (uncommitted_state.hasNode(ephemeral_path)) { deltas.emplace_back( @@ -1656,6 +1657,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( if (is_local) { + assert(!zk_request->isReadRequest()); if (check_acl && !request_processor->checkAuth(*this, session_id, true)) { response = zk_request->makeResponse(); @@ -1713,7 +1715,10 @@ void KeeperStorage::rollbackRequest(int64_t rollback_zxid) { // we can only rollback the last zxid (if there is any) // if there is a delta with a larger zxid, we have invalid state - assert(uncommitted_state.deltas.empty() || uncommitted_state.deltas.back().zxid <= rollback_zxid); + const auto last_zxid = uncommitted_state.deltas.back().zxid; + if (!uncommitted_state.deltas.empty() && last_zxid > rollback_zxid) + throw DB::Exception{DB::ErrorCodes::LOGICAL_ERROR, "Invalid state of deltas found while trying to rollback request. Last ZXID ({}) is larger than the requested ZXID ({})", last_zxid, rollback_zxid}; + std::erase_if(uncommitted_state.deltas, [rollback_zxid](const auto & delta) { return delta.zxid == rollback_zxid; }); } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 09ca731f21e..e7791192724 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -100,6 +100,16 @@ public: /// container. Container container; + // Applying ZooKeeper request to storage consists of two steps: + // - preprocessing which, instead of applying the changes directly to storage, + // generates deltas with those changes, denoted with the request ZXID + // - processing which applies deltas with the correct ZXID to the storage + // + // Delta objects allow us two things: + // - fetch the latest, uncommitted state of an object by getting the committed + // state of that same object from the storage and applying the deltas + // in the same order as they are defined + // - quickly commit the changes to the storage struct CreateNodeDelta { Coordination::Stat stat; @@ -177,8 +187,7 @@ public: } } - template - bool hasACL(int64_t session_id, bool is_local, Predicate predicate) + bool hasACL(int64_t session_id, bool is_local, std::function predicate) { for (const auto & session_auth : storage.session_and_auth[session_id]) { @@ -192,7 +201,7 @@ public: for (const auto & delta : deltas) { - if (auto * auth_delta = std::get_if(&delta.operation); + if (const auto * auth_delta = std::get_if(&delta.operation); auth_delta && auth_delta->session_id == session_id && predicate(auth_delta->auth_id)) return true; } @@ -212,6 +221,9 @@ public: Coordination::Error commit(int64_t zxid, int64_t session_id); + // Create node in the storage + // Returns false if it failed to create the node, true otherwise + // We don't care about the exact failure because we should've caught it during preprocessing bool createNode( const std::string & path, String data, @@ -220,6 +232,10 @@ public: bool is_ephemeral, Coordination::ACLs node_acls, int64_t session_id); + + // Remove node in the storage + // Returns false if it failed to remove the node, true otherwise + // We don't care about the exact failure because we should've caught it during preprocessing bool removeNode(const std::string & path, int32_t version); bool checkACL(StringRef path, int32_t permissions, int64_t session_id, bool is_local); From 70c0adef0b3b2f3d3961902bfb921e972052b544 Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 17 May 2022 07:26:52 +0000 Subject: [PATCH 186/615] update test --- .../0_stateless/01078_window_view_alter_query_watch.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 05961a8468b..93e92107f48 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -51,17 +51,17 @@ with client(name="client1>", log=log) as client1, client( client1.expect("Query id" + end_of_block) client1.expect("Progress: 0.00 rows.*\)") client2.send( - "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:00');" + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) client2.expect("Ok.") client2.send( - "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:05');" + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));" ) client2.expect("Ok.") client1.expect("1" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") client2.send( - "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) * 2 AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client2.expect("Ok.") client2.expect(prompt) @@ -71,11 +71,11 @@ with client(name="client1>", log=log) as client1, client( client3.expect("Query id" + end_of_block) client3.expect("Progress: 0.00 rows.*\)") client2.send( - "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:06');" + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" ) client2.expect("Ok.") client2.send( - "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, '1990/01/01 12:00:10');" + "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));" ) client2.expect("Ok.") client3.expect("2" + end_of_block) From e03d6009cc34756c08db19fcc67f61b88d2ba8be Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 17 May 2022 08:26:22 +0000 Subject: [PATCH 187/615] update tests --- .../0_stateless/01056_window_view_proc_hop_watch.py | 3 ++- .../01059_window_view_event_hop_watch_strict_asc.py | 1 + .../0_stateless/01062_window_view_event_hop_watch_asc.py | 9 +++++---- .../0_stateless/01069_window_view_proc_tumble_watch.py | 1 + .../0_stateless/01070_window_view_watch_events.py | 5 +++-- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index 772ac04b287..68419dd0422 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -43,8 +43,9 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01056_window_view_proc_hop_watch.wv") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( - "INSERT INTO 01056_window_view_proc_hop_watch.mt VALUES (1, now('US/Samoa') + 1)" + "INSERT INTO 01056_window_view_proc_hop_watch.mt VALUES (1, now('US/Samoa') + 3)" ) client1.expect("1" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index 75314d1ca84..70573d4fa83 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -41,6 +41,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH db_01059_event_hop_watch_strict_asc.wv") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( "INSERT INTO db_01059_event_hop_watch_strict_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index 57ef6f98165..a3eaabd5f23 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -45,21 +45,22 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01062_window_view_event_hop_watch_asc.wv") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, '1990/01/01 12:00:00');" + "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) client2.expect(prompt) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, '1990/01/01 12:00:05');" + "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));" ) client2.expect(prompt) client1.expect("1*" + end_of_block) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, '1990/01/01 12:00:06');" + "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" ) client2.expect(prompt) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, '1990/01/01 12:00:10');" + "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));" ) client2.expect(prompt) client1.expect("1" + end_of_block) diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index 24a6ccd157e..7b7d05c92db 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -45,6 +45,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01069_window_view_proc_tumble_watch.wv") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( "INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 3)" ) diff --git a/tests/queries/0_stateless/01070_window_view_watch_events.py b/tests/queries/0_stateless/01070_window_view_watch_events.py index c4d36bc51a2..f8782e5e7ce 100755 --- a/tests/queries/0_stateless/01070_window_view_watch_events.py +++ b/tests/queries/0_stateless/01070_window_view_watch_events.py @@ -45,12 +45,13 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01070_window_view_watch_events.wv EVENTS") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( - "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, '1990/01/01 12:00:00');" + "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) client2.expect("Ok.") client2.send( - "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, '1990/01/01 12:00:06');" + "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" ) client2.expect("Ok.") client1.expect("1990-01-01 12:00:05" + end_of_block) From e75aa445a65482fb3404e49513961b507e6dc699 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 May 2022 11:36:12 +0200 Subject: [PATCH 188/615] Update src/Coordination/KeeperStorage.h Co-authored-by: Antonio Andelic --- src/Coordination/KeeperStorage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index e7791192724..7d26ae24dd9 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -187,7 +187,7 @@ public: } } - bool hasACL(int64_t session_id, bool is_local, std::function predicate) + bool hasACL(int64_t session_id, bool is_local, std::function predicate) { for (const auto & session_auth : storage.session_and_auth[session_id]) { From f958203b6c6ba61a2373ec477092432ae454fc6d Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 May 2022 11:55:54 +0200 Subject: [PATCH 189/615] Update src/Coordination/KeeperStorage.cpp Co-authored-by: Antonio Andelic --- src/Coordination/KeeperStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 2a31db415a2..b28a0bc6911 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1657,7 +1657,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( if (is_local) { - assert(!zk_request->isReadRequest()); + assert(zk_request->isReadRequest()); if (check_acl && !request_processor->checkAuth(*this, session_id, true)) { response = zk_request->makeResponse(); From 639ceb84b1c72a59931eb55c03cba6f3ae62e77c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 17 May 2022 10:15:12 +0000 Subject: [PATCH 190/615] Add comment and fix typo --- src/Coordination/KeeperStorage.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index b28a0bc6911..6c0699be95c 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -301,6 +301,9 @@ namespace Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_id) { + // Deltas are added with increasing ZXIDs + // If there are no deltas for the commit_zxid (e.g. read requests), we instantly return + // on first delta for (auto & delta : uncommitted_state.deltas) { if (delta.zxid > commit_zxid) @@ -1334,7 +1337,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); auto & deltas = storage.uncommitted_state.deltas; - // the deltas will have atleast SubDeltaEnd or FailedMultiDelta + // the deltas will have at least SubDeltaEnd or FailedMultiDelta assert(!deltas.empty()); if (auto * failed_multi = std::get_if(&deltas.front().operation)) { From e4bc7e9979ce08079cdb7df368d220f4ef7a6972 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 17 May 2022 19:31:59 +0800 Subject: [PATCH 191/615] use splitInto instead of Poco::Tokenizer --- src/Storages/Cache/ExternalDataSourceCache.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index c408e82a668..84ae35451ea 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include namespace ProfileEvents { @@ -228,11 +228,7 @@ void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_d } LOG_INFO( log, "Initializing local cache for remote data sources. Local cache root path: {}, cache size limit: {}", root_dir_, limit_size_); - Poco::StringTokenizer tokenizer(root_dir_, ","); - for (size_t i = 0; i < tokenizer.count(); ++i) - { - root_dirs.emplace_back(tokenizer[i]); - } + splitInto<','>(root_dirs, root_dir_); std::sort(root_dirs.begin(), root_dirs.end()); local_cache_bytes_read_before_flush = bytes_read_before_flush_; lru_caches = std::make_unique(limit_size_); From 1c90f326c24dc866651c11a0c8ad6606235af220 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 May 2022 15:25:05 +0200 Subject: [PATCH 192/615] Remove DiskHDFS --- src/Disks/DiskObjectStorage.cpp | 6 +- src/Disks/HDFS/DiskHDFS.cpp | 143 ---------------------------- src/Disks/HDFS/DiskHDFS.h | 84 ---------------- src/Disks/HDFS/registerDiskHDFS.cpp | 55 +++++++++++ src/Disks/HDFSObjectStorage.cpp | 17 +++- src/Disks/HDFSObjectStorage.h | 3 +- src/Disks/IObjectStorage.cpp | 2 +- src/Disks/S3ObjectStorage.cpp | 5 +- 8 files changed, 77 insertions(+), 238 deletions(-) delete mode 100644 src/Disks/HDFS/DiskHDFS.cpp delete mode 100644 src/Disks/HDFS/DiskHDFS.h create mode 100644 src/Disks/HDFS/registerDiskHDFS.cpp diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index b0679051eec..bfec350caba 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -777,7 +777,7 @@ std::unique_ptr DiskObjectStorage::writeFile( [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, object_attributes, create_metadata_callback, buf_size, settings); + return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, WriteMode::Rewrite, object_attributes, create_metadata_callback, buf_size, settings); } @@ -848,7 +848,7 @@ DiskObjectStorageReservation::~DiskObjectStorageReservation() void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const { const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; - auto buf = disk->object_storage->writeObject(path, metadata); + auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite, metadata); buf->write('0'); buf->finalize(); } @@ -892,7 +892,7 @@ void DiskObjectStorageMetadataHelper::saveSchemaVersion(const int & version) con { auto path = disk->remote_fs_root_path + SCHEMA_VERSION_OBJECT; - auto buf = disk->object_storage->writeObject(path); + auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite); writeIntText(version, *buf); buf->finalize(); diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp deleted file mode 100644 index b8e482f623c..00000000000 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ /dev/null @@ -1,143 +0,0 @@ -#include - -#if USE_HDFS - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; -} - - -DiskHDFS::DiskHDFS( - const String & disk_name_, - const String & hdfs_root_path_, - SettingsPtr settings_, - DiskPtr metadata_disk_, - const Poco::Util::AbstractConfiguration & config_) - : IDiskRemote(disk_name_, hdfs_root_path_, metadata_disk_, nullptr, "DiskHDFS", settings_->thread_pool_size) - , config(config_) - , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) - , hdfs_fs(createHDFSFS(hdfs_builder.get())) - , settings(std::move(settings_)) -{ -} - - -std::unique_ptr DiskHDFS::readFile(const String & path, const ReadSettings & read_settings, std::optional, std::optional) const -{ - auto metadata = readMetadata(path); - - LOG_TEST(log, - "Read from file by path: {}. Existing HDFS objects: {}", - backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - - auto hdfs_impl = std::make_unique(config, remote_fs_root_path, remote_fs_root_path, metadata.remote_fs_objects, read_settings); - auto buf = std::make_unique(std::move(hdfs_impl)); - return std::make_unique(std::move(buf), settings->min_bytes_for_seek); -} - - -std::unique_ptr DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) -{ - /// Path to store new HDFS object. - std::string file_name = getRandomName(); - std::string hdfs_path = fs::path(remote_fs_root_path) / file_name; - - LOG_TRACE(log, "{} to file by path: {}. HDFS path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", - backQuote(metadata_disk->getPath() + path), hdfs_path); - - /// Single O_WRONLY in libhdfs adds O_TRUNC - auto hdfs_buffer = std::make_unique(hdfs_path, - config, settings->replication, buf_size, - mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); - auto create_metadata_callback = [this, path, mode, file_name] (size_t count) - { - readOrCreateUpdateAndStoreMetadata(path, mode, false, [file_name, count] (Metadata & metadata) { metadata.addObject(file_name, count); return true; }); - }; - - return std::make_unique(std::move(hdfs_buffer), std::move(create_metadata_callback), hdfs_path); -} - -void DiskHDFS::removeFromRemoteFS(const std::vector & paths) -{ - for (const auto & hdfs_path : paths) - { - const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); - - /// Add path from root to file name - int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); - if (res == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); - } -} - -bool DiskHDFS::checkUniqueId(const String & hdfs_uri) const -{ - if (!boost::algorithm::starts_with(hdfs_uri, remote_fs_root_path)) - return false; - const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); - const String remote_fs_object_path = hdfs_uri.substr(begin_of_path); - return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); -} - -namespace -{ -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings) -{ - return std::make_unique( - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".thread_pool_size", 16), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), - settings.hdfs_replication); -} -} - -void registerDiskHDFS(DiskFactory & factory) -{ - auto creator = [](const String & name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - ContextPtr context_, - const DisksMap & /*map*/) -> DiskPtr - { - String uri{config.getString(config_prefix + ".endpoint")}; - checkHDFSURL(uri); - - if (uri.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); - - auto metadata_disk = prepareForLocalMetadata(name, config, config_prefix, context_).second; - - return std::make_shared( - name, uri, - getSettings(config, config_prefix, context_->getSettingsRef()), - metadata_disk, config); - }; - - factory.registerDiskType("hdfs", creator); -} - -} -#endif diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h deleted file mode 100644 index 5c6e011dc96..00000000000 --- a/src/Disks/HDFS/DiskHDFS.h +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once - -#include - -#if USE_HDFS - -#include -#include -#include -#include - - -namespace DB -{ - -struct DiskHDFSSettings -{ - size_t min_bytes_for_seek; - int thread_pool_size; - int objects_chunk_size_to_delete; - int replication; - - DiskHDFSSettings( - int min_bytes_for_seek_, - int thread_pool_size_, - int objects_chunk_size_to_delete_, - int replication_) - : min_bytes_for_seek(min_bytes_for_seek_) - , thread_pool_size(thread_pool_size_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) - , replication(replication_) {} -}; - - -/** - * Storage for persisting data in HDFS and metadata on the local disk. - * Files are represented by file in local filesystem (clickhouse_root/disks/disk_name/path/to/file) - * that contains HDFS object key with actual data. - */ -class DiskHDFS final : public IDiskRemote -{ -public: - using SettingsPtr = std::unique_ptr; - - DiskHDFS( - const String & disk_name_, - const String & hdfs_root_path_, - SettingsPtr settings_, - DiskPtr metadata_disk_, - const Poco::Util::AbstractConfiguration & config_); - - DiskType getType() const override { return DiskType::HDFS; } - bool isRemote() const override { return true; } - - bool supportZeroCopyReplication() const override { return true; } - - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override; - - void removeFromRemoteFS(const std::vector & paths) override; - - /// Check file exists and ClickHouse has an access to it - /// Overrode in remote disk - /// Required for remote disk to ensure that replica has access to data written by other node - bool checkUniqueId(const String & hdfs_uri) const override; - -private: - String getRandomName() { return toString(UUIDHelpers::generateV4()); } - - const Poco::Util::AbstractConfiguration & config; - - HDFSBuilderWrapper hdfs_builder; - HDFSFSPtr hdfs_fs; - - SettingsPtr settings; -}; - -} -#endif diff --git a/src/Disks/HDFS/registerDiskHDFS.cpp b/src/Disks/HDFS/registerDiskHDFS.cpp new file mode 100644 index 00000000000..f67f6fbb440 --- /dev/null +++ b/src/Disks/HDFS/registerDiskHDFS.cpp @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +void registerDiskHDFS(DiskFactory & factory) +{ + auto creator = [](const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context_, + const DisksMap & /*map*/) -> DiskPtr + { + String uri{config.getString(config_prefix + ".endpoint")}; + checkHDFSURL(uri); + + if (uri.back() != '/') + throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); + + std::unique_ptr settings = std::make_unique( + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), + context_->getSettingsRef().hdfs_replication + ); + FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context_); + + ObjectStoragePtr hdfs_storage = std::make_unique(std::move(cache), uri, std::move(settings), config); + + auto metadata_disk = prepareForLocalMetadata(name, config, config_prefix, context_).second; + uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); + + return std::make_shared( + name, + uri, + "DiskHDFS", + metadata_disk, + std::move(hdfs_storage), + DiskType::HDFS, + /* send_metadata = */ false, + copy_thread_pool_size); + }; + + factory.registerDiskType("hdfs", creator); +} + +} diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp index 646ec6c8bd4..ad3ecf47bbb 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/HDFSObjectStorage.cpp @@ -1,5 +1,7 @@ #include + #include +#include #include #include #include @@ -42,9 +44,7 @@ std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT std::optional, std::optional) const { - auto buf = std::make_unique(path, path, config, read_settings.remote_fs_buffer_size); - - return std::make_unique(std::move(buf), settings->min_bytes_for_seek); + return std::make_unique(path, path, config, read_settings.remote_fs_buffer_size); } std::unique_ptr HDFSObjectStorage::readObjects( /// NOLINT @@ -153,12 +153,21 @@ void HDFSObjectStorage::copyObject( /// NOLINT throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); auto in = readObject(object_from); - auto out = writeObject(object_to); + auto out = writeObject(object_to, WriteMode::Rewrite); copyData(*in, *out); out->finalize(); } +void HDFSObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr) +{ +} + +std::unique_ptr HDFSObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr) +{ + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS object storage doesn't support cloning"); +} + } #endif diff --git a/src/Disks/HDFSObjectStorage.h b/src/Disks/HDFSObjectStorage.h index 397741d2c4a..3c1bac02ee3 100644 --- a/src/Disks/HDFSObjectStorage.h +++ b/src/Disks/HDFSObjectStorage.h @@ -99,11 +99,10 @@ public: void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; - String getObjectsNamespace() const override { return bucket; } + String getObjectsNamespace() const override { return ""; } std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; - private: const Poco::Util::AbstractConfiguration & config; diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/IObjectStorage.cpp index 538cc702791..1997022d05c 100644 --- a/src/Disks/IObjectStorage.cpp +++ b/src/Disks/IObjectStorage.cpp @@ -41,7 +41,7 @@ void IObjectStorage::copyObjectToAnotherObjectStorage(const std::string & object copyObject(object_from, object_to, object_to_attributes); auto in = readObject(object_from); - auto out = object_storage_to.writeObject(object_to); + auto out = object_storage_to.writeObject(object_to, WriteMode::Rewrite); copyData(*in, *out); out->finalize(); } diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index 3feea30ace3..fe7c73b20cb 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -133,12 +133,15 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT std::unique_ptr S3ObjectStorage::writeObject( /// NOLINT const std::string & path, - WriteMode /* mode */, // S3 doesn't support append, only rewrite + WriteMode mode, // S3 doesn't support append, only rewrite std::optional attributes, FinalizeCallback && finalize_callback, size_t buf_size, const WriteSettings & write_settings) { + if (mode != WriteMode::Rewrite) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); + bool cache_on_write = cache && fs::path(path).extension() != ".tmp" && write_settings.enable_filesystem_cache_on_write_operations From f9cd8208540a9d6783c732b0d0dcdea1d2910211 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 May 2022 15:39:46 +0200 Subject: [PATCH 193/615] Fix style --- src/Common/ErrorCodes.cpp | 1 + src/Disks/HDFSObjectStorage.cpp | 10 +++++----- src/Disks/S3ObjectStorage.cpp | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index eb84e24b713..aabc3c8e8dc 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -624,6 +624,7 @@ M(653, CANNOT_PARSE_BACKUP_SETTINGS) \ M(654, WRONG_BACKUP_SETTINGS) \ M(655, FAILED_TO_RESTORE_METADATA_ON_OTHER_NODE) \ + M(656, HDFS_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp index ad3ecf47bbb..cbd89bcca88 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/HDFSObjectStorage.cpp @@ -19,7 +19,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int LOGICAL_ERROR; + extern const int HDFS_ERROR; } void HDFSObjectStorage::shutdown() @@ -86,7 +86,7 @@ void HDFSObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & c int32_t num_entries; auto * files_list = hdfsListDirectory(hdfs_fs.get(), path.substr(begin_of_path).c_str(), &num_entries); if (num_entries == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + path); + throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + path); for (int32_t i = 0; i < num_entries; ++i) children.emplace_back(files_list[i].mName, files_list[i].mSize); @@ -100,7 +100,7 @@ void HDFSObjectStorage::removeObject(const std::string & path) /// Add path from root to file name int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0); if (res == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + path); + throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + path); } @@ -113,7 +113,7 @@ void HDFSObjectStorage::removeObjects(const std::vector & paths) /// Add path from root to file name int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); if (res == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); + throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + hdfs_path); } } @@ -135,7 +135,7 @@ void HDFSObjectStorage::removeObjectsIfExist(const std::vector & pa /// Add path from root to file name int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); if (res == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HDFSDelete failed with path: " + hdfs_path); + throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + hdfs_path); } } diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index fe7c73b20cb..f09bbc28474 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -34,6 +34,7 @@ namespace DB namespace ErrorCodes { extern const int S3_ERROR; + extern const int BAD_ARGUMENTS; } namespace From 5872781ac60678b3792fbb711fe47ed7a7528239 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 May 2022 15:50:30 +0200 Subject: [PATCH 194/615] Merge with master --- src/Common/ErrorCodes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index de8b5ec19f8..203c4636b72 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -627,7 +627,7 @@ M(656, MEILISEARCH_EXCEPTION) \ M(657, UNSUPPORTED_MEILISEARCH_TYPE) \ M(658, MEILISEARCH_MISSING_SOME_COLUMNS) \ - M(656, HDFS_ERROR) \ + M(659, HDFS_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ From 037db32e24529b30642481459bb1f6475465b769 Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 17 May 2022 13:50:55 +0000 Subject: [PATCH 195/615] set alter query internal --- src/Storages/WindowView/StorageWindowView.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e5f19cc1849..b90d5756e72 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -902,7 +902,8 @@ inline void StorageWindowView::cleanup() auto alter_query = getCleanupQuery(); auto cleanup_context = Context::createCopy(getContext()); - cleanup_context->getClientInfo().query_kind = ClientInfo::QueryKind::INITIAL_QUERY; + cleanup_context->getClientInfo().setInitialQuery(); + cleanup_context->setInternalQuery(true); InterpreterAlterQuery interpreter_alter(alter_query, cleanup_context); interpreter_alter.execute(); @@ -922,7 +923,7 @@ void StorageWindowView::threadFuncCleanup() } if (!shutdown_called) - clean_cache_task->scheduleAfter(clean_interval_ms); + clean_cache_task->scheduleAfter(1000); } void StorageWindowView::threadFuncFireProc() From 34af1cb116fc7abc3d9db8c3ea028aa2f29803e2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 16 May 2022 20:44:24 +0200 Subject: [PATCH 196/615] Throw option WEVERYTHING out WEVERYTHING enables on Clang literally every warning. People on the internet are divided if this is a good thing or not but ClickHouse compiles with -Weverything + some exceptions for noisy warnings since at least a year. I tried to build with WEVERYTHING = OFF and the build was badly broken. It seems nobody actually turns WEVERYTHING off. Actually, why would one if the CI builds (configured with WEVERYTHING = ON) potentially generate errors not generated in local development. To simplify the build scripts and to remove the need to maintain two sets of compiler warnings, I made WEVERYTHING the default and threw WEVERYTHING = OFF out. --- cmake/warnings.cmake | 103 ++++++--------------- docs/en/development/cmake-in-clickhouse.md | 6 -- 2 files changed, 28 insertions(+), 81 deletions(-) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index e79771d2e6f..3a6b44b9170 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -9,11 +9,6 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") -# Add some warnings that are not available even with -Wall -Wextra -Wpedantic. -# Intended for exploration of new compiler warnings that may be found useful. -# Applies to clang only -option (WEVERYTHING "Enable -Weverything option with some exceptions." ON) - # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size. # Only in release build because debug has too large stack frames. if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")) @@ -21,81 +16,39 @@ if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE endif () if (COMPILER_CLANG) + add_warning(everything) add_warning(pedantic) no_warning(vla-extension) no_warning(zero-length-array) no_warning(c11-extensions) no_warning(unused-command-line-argument) - - if (WEVERYTHING) - add_warning(everything) - no_warning(c++98-compat-pedantic) - no_warning(c++98-compat) - no_warning(c99-extensions) - no_warning(conversion) - no_warning(ctad-maybe-unsupported) # clang 9+, linux-only - no_warning(deprecated-dynamic-exception-spec) - no_warning(disabled-macro-expansion) - no_warning(documentation-unknown-command) - no_warning(double-promotion) - no_warning(exit-time-destructors) - no_warning(float-equal) - no_warning(global-constructors) - no_warning(missing-prototypes) - no_warning(missing-variable-declarations) - no_warning(nested-anon-types) - no_warning(packed) - no_warning(padded) - no_warning(return-std-move-in-c++11) # clang 7+ - no_warning(shift-sign-overflow) - no_warning(sign-conversion) - no_warning(switch-enum) - no_warning(undefined-func-template) - no_warning(unused-template) - no_warning(vla) - no_warning(weak-template-vtables) - no_warning(weak-vtables) - - # TODO Enable conversion, sign-conversion, double-promotion warnings. - else () - add_warning(comma) - add_warning(conditional-uninitialized) - add_warning(covered-switch-default) - add_warning(deprecated) - add_warning(embedded-directive) - add_warning(empty-init-stmt) # linux-only - add_warning(extra-semi-stmt) # linux-only - add_warning(extra-semi) - add_warning(gnu-case-range) - add_warning(inconsistent-missing-destructor-override) - add_warning(newline-eof) - add_warning(old-style-cast) - add_warning(range-loop-analysis) - add_warning(redundant-parens) - add_warning(reserved-id-macro) - add_warning(shadow-field) - add_warning(shadow-uncaptured-local) - add_warning(shadow) - add_warning(string-plus-int) - add_warning(undef) - add_warning(unreachable-code-return) - add_warning(unreachable-code) - add_warning(unused-exception-parameter) - add_warning(unused-macros) - add_warning(unused-member-function) - add_warning(unneeded-internal-declaration) - add_warning(implicit-int-float-conversion) - add_warning(no-delete-null-pointer-checks) - add_warning(anon-enum-enum-conversion) - add_warning(assign-enum) - add_warning(bitwise-op-parentheses) - add_warning(int-in-bool-context) - add_warning(sometimes-uninitialized) - add_warning(tautological-bitwise-compare) - - # XXX: libstdc++ has some of these for 3way compare - add_warning(zero-as-null-pointer-constant) - endif () + no_warning(c++98-compat-pedantic) + no_warning(c++98-compat) + no_warning(c99-extensions) + no_warning(conversion) + no_warning(ctad-maybe-unsupported) # clang 9+, linux-only + no_warning(deprecated-dynamic-exception-spec) + no_warning(disabled-macro-expansion) + no_warning(documentation-unknown-command) + no_warning(double-promotion) + no_warning(exit-time-destructors) + no_warning(float-equal) + no_warning(global-constructors) + no_warning(missing-prototypes) + no_warning(missing-variable-declarations) + no_warning(nested-anon-types) + no_warning(packed) + no_warning(padded) + no_warning(return-std-move-in-c++11) # clang 7+ + no_warning(shift-sign-overflow) + no_warning(sign-conversion) + no_warning(switch-enum) + no_warning(undefined-func-template) + no_warning(unused-template) + no_warning(vla) + no_warning(weak-template-vtables) + no_warning(weak-vtables) + # TODO Enable conversion, sign-conversion, double-promotion warnings. elseif (COMPILER_GCC) # Add compiler options only to c++ compiler function(add_cxx_compile_options option) diff --git a/docs/en/development/cmake-in-clickhouse.md b/docs/en/development/cmake-in-clickhouse.md index 14b98b136b3..65d280df902 100644 --- a/docs/en/development/cmake-in-clickhouse.md +++ b/docs/en/development/cmake-in-clickhouse.md @@ -420,12 +420,6 @@ Note that ClickHouse uses forks of these libraries, see https://github.com/Click Using system libs can cause a lot of warnings in includes (on macro expansion). -WEVERYTHING -ON -Enable -Weverything option with some exceptions. -Add some warnings that are not available even with -Wall -Wextra -Wpedantic. Intended for exploration of new compiler warnings that may be found useful. Applies to clang only - - WITH_COVERAGE OFF Profile the resulting binary/binaries From 0627a3d23a3f49c93f473cd9dd056d5d3c4731a8 Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 17 May 2022 14:24:05 +0000 Subject: [PATCH 197/615] disable parallel tests --- tests/queries/0_stateless/01056_window_view_proc_hop_watch.py | 2 ++ .../01059_window_view_event_hop_watch_strict_asc.py | 4 +++- .../0_stateless/01062_window_view_event_hop_watch_asc.py | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index 68419dd0422..8580ad43ccd 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys import signal diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index 70573d4fa83..44c2f211f2b 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys import signal @@ -31,7 +33,7 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send( - "CREATE TABLE db_01059_event_hop_watch_strict_asc.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" + "CREATE TABLE db_01059_event_hop_watch_strict_asc.mt(a Int32, timestamp DateTime('US/Samoa')) ENGINE=MergeTree ORDER BY tuple()" ) client1.expect(prompt) client1.send( diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index a3eaabd5f23..ddf0c423fa9 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys import signal From c0a4af295ca5b96e3e30914ef7a0536303798ac0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 17 May 2022 19:06:13 +0200 Subject: [PATCH 198/615] Cosmetics --- cmake/ccache.cmake | 4 +++- cmake/git_status.cmake | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 23c2ada513d..3fd99742b39 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,3 +1,5 @@ +# Setup integration with ccache to speed up builds, see https://ccache.dev/ + if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MATCHES "ccache") set(COMPILER_MATCHES_CCACHE 1) else() @@ -6,6 +8,7 @@ endif() if ((ENABLE_CCACHE OR NOT DEFINED ENABLE_CCACHE) AND NOT COMPILER_MATCHES_CCACHE) find_program (CCACHE_FOUND ccache) + if (CCACHE_FOUND) set(ENABLE_CCACHE_BY_DEFAULT 1) else() @@ -18,7 +21,6 @@ if (NOT CCACHE_FOUND AND NOT DEFINED ENABLE_CCACHE AND NOT COMPILER_MATCHES_CCAC "Setting it up will significantly reduce compilation time for 2nd and consequent builds") endif() -# https://ccache.dev/ option(ENABLE_CCACHE "Speedup re-compilations using ccache (external tool)" ${ENABLE_CCACHE_BY_DEFAULT}) if (NOT ENABLE_CCACHE) diff --git a/cmake/git_status.cmake b/cmake/git_status.cmake index feab4acef1d..c1047c0ccbf 100644 --- a/cmake/git_status.cmake +++ b/cmake/git_status.cmake @@ -1,17 +1,22 @@ # Print the status of the git repository (if git is available). # This is useful for troubleshooting build failure reports + find_package(Git) if (Git_FOUND) + execute_process( COMMAND ${GIT_EXECUTABLE} rev-parse HEAD WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GIT_COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}") + execute_process( COMMAND ${GIT_EXECUTABLE} status WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) + else() - message(STATUS "The git program could not be found.") + message(STATUS "Git could not be found.") endif() From 6db32a3ed12757b2c5c93aca4a24154d773aa754 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 17 May 2022 19:23:06 +0200 Subject: [PATCH 199/615] Fix externally set compiler launchers The compiler launcher (ccache, distcc) can be set externally via -DCMAKE_CXX_COMPILER_LAUNCHER=. We previously silently ignored this setting and continued without any launcher (e.g. ccache). Changed this to now respect the externally specified launcher. --- cmake/ccache.cmake | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 3fd99742b39..fa50cd30f47 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,12 +1,14 @@ # Setup integration with ccache to speed up builds, see https://ccache.dev/ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MATCHES "ccache") - set(COMPILER_MATCHES_CCACHE 1) -else() - set(COMPILER_MATCHES_CCACHE 0) + # custom compiler launcher already defined, most likely because cmake was invoked with like "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache" or + # via environment variable --> respect setting and trust that the launcher was specified correctly + message(STATUS "Using custom C compiler launcher: ${CMAKE_C_COMPILER_LAUNCHER}") + message(STATUS "Using custom C++ compiler launcher: ${CMAKE_CXX_COMPILER_LAUNCHER}") + return() endif() -if ((ENABLE_CCACHE OR NOT DEFINED ENABLE_CCACHE) AND NOT COMPILER_MATCHES_CCACHE) +if ((ENABLE_CCACHE OR NOT DEFINED ENABLE_CCACHE)) find_program (CCACHE_FOUND ccache) if (CCACHE_FOUND) @@ -16,7 +18,7 @@ if ((ENABLE_CCACHE OR NOT DEFINED ENABLE_CCACHE) AND NOT COMPILER_MATCHES_CCACHE endif() endif() -if (NOT CCACHE_FOUND AND NOT DEFINED ENABLE_CCACHE AND NOT COMPILER_MATCHES_CCACHE) +if (NOT CCACHE_FOUND AND NOT DEFINED ENABLE_CCACHE) message(WARNING "CCache is not found. We recommend setting it up if you build ClickHouse from source often. " "Setting it up will significantly reduce compilation time for 2nd and consequent builds") endif() @@ -27,7 +29,7 @@ if (NOT ENABLE_CCACHE) return() endif() -if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) +if (CCACHE_FOUND) execute_process(COMMAND ${CCACHE_FOUND} "-V" OUTPUT_VARIABLE CCACHE_VERSION) string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) @@ -53,6 +55,6 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) else () message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: No. Found ${CCACHE_FOUND} (version ${CCACHE_VERSION}) but disabled because of bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") endif () -elseif (NOT CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) +elseif (NOT CCACHE_FOUND) message (${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: No") endif () From 92259335b046fd06faf341b664bbb43921f5d570 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 17 May 2022 19:35:22 +0200 Subject: [PATCH 200/615] Use existing variable COMPILER_CLANG to identify Clang --- cmake/ccache.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index fa50cd30f47..910364a7448 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -33,7 +33,7 @@ if (CCACHE_FOUND) execute_process(COMMAND ${CCACHE_FOUND} "-V" OUTPUT_VARIABLE CCACHE_VERSION) string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) - if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT COMPILER_CLANG) message(STATUS "Using ccache: ${CCACHE_FOUND} (version ${CCACHE_VERSION})") set(LAUNCHER ${CCACHE_FOUND}) From 4aff310a1c875c5cc814892157af4cf02325136e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 17 May 2022 19:58:38 +0200 Subject: [PATCH 201/615] Made the ccache integration more straightforward The new logic is simpler and should do the same as before. --- cmake/ccache.cmake | 71 ++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 41 deletions(-) diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 910364a7448..c0e2d41f969 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -8,53 +8,42 @@ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MA return() endif() -if ((ENABLE_CCACHE OR NOT DEFINED ENABLE_CCACHE)) - find_program (CCACHE_FOUND ccache) - - if (CCACHE_FOUND) - set(ENABLE_CCACHE_BY_DEFAULT 1) - else() - set(ENABLE_CCACHE_BY_DEFAULT 0) - endif() -endif() - -if (NOT CCACHE_FOUND AND NOT DEFINED ENABLE_CCACHE) - message(WARNING "CCache is not found. We recommend setting it up if you build ClickHouse from source often. " - "Setting it up will significantly reduce compilation time for 2nd and consequent builds") -endif() - -option(ENABLE_CCACHE "Speedup re-compilations using ccache (external tool)" ${ENABLE_CCACHE_BY_DEFAULT}) +option(ENABLE_CCACHE "Speedup re-compilations using ccache (external tool)" ON) if (NOT ENABLE_CCACHE) + message(STATUS "Using ccache: no (disabled via configuration)") return() endif() -if (CCACHE_FOUND) - execute_process(COMMAND ${CCACHE_FOUND} "-V" OUTPUT_VARIABLE CCACHE_VERSION) - string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) +find_program (CCACHE_EXECUTABLE ccache) - if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT COMPILER_CLANG) - message(STATUS "Using ccache: ${CCACHE_FOUND} (version ${CCACHE_VERSION})") - set(LAUNCHER ${CCACHE_FOUND}) +if (NOT CCACHE_EXECUTABLE) + message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: no (Could not find find ccache. To significantly reduce compile times for the 2nd, 3rd, etc. build, it is highly recommended to install ccache. To suppress this message, run cmake with -DENABLE_CCACHE=0)") + return() +endif() - # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is - # filled from the debian/changelog or current time. - # - # - 4.0+ ccache always includes this environment variable into the hash - # of the manifest, which do not allow to use previous cache, - # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__ - # - # Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2). - if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") - message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") - set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}) - endif() +execute_process(COMMAND ${CCACHE_EXECUTABLE} "-V" OUTPUT_VARIABLE CCACHE_VERSION) +string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) - set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER}) - set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER}) - else () - message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: No. Found ${CCACHE_FOUND} (version ${CCACHE_VERSION}) but disabled because of bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") - endif () -elseif (NOT CCACHE_FOUND) - message (${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: No") +if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT COMPILER_CLANG) + message(STATUS "Using ccache: ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION})") + set(LAUNCHER ${CCACHE_EXECUTABLE}) + + # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is + # filled from the debian/changelog or current time. + # + # - 4.0+ ccache always includes this environment variable into the hash + # of the manifest, which do not allow to use previous cache, + # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__ + # + # Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2). + if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") + message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") + set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_EXECUTABLE}) + endif() + + set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER}) + set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER}) +else () + message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: No. Found ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION}) but disabled because of bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") endif () From 30a01506ede83608e708f132d49870f0e3a68912 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 17 May 2022 20:15:56 +0200 Subject: [PATCH 202/615] Remove weird check for not-Clang The check activated ccache unconditionally for all non-Clang compilers (= GCC) while allowing ancient ccache versions for these. Perhaps there was a reason for that in the past but it's simpler to only require a minimum ccache version. To simplify further, also require at least ccache 3.3 (released in 2016) instead of 3.2.1 (released in 2014). --- cmake/ccache.cmake | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index c0e2d41f969..acbb00caf56 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -25,25 +25,27 @@ endif() execute_process(COMMAND ${CCACHE_EXECUTABLE} "-V" OUTPUT_VARIABLE CCACHE_VERSION) string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) -if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT COMPILER_CLANG) - message(STATUS "Using ccache: ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION})") - set(LAUNCHER ${CCACHE_EXECUTABLE}) +set (CCACHE_MINIMUM_VERSION 3.3) - # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is - # filled from the debian/changelog or current time. - # - # - 4.0+ ccache always includes this environment variable into the hash - # of the manifest, which do not allow to use previous cache, - # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__ - # - # Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2). - if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") - message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") - set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_EXECUTABLE}) - endif() +if (CCACHE_VERSION VERSION_LESS_EQUAL ${CCACHE_MINIMUM_VERSION}) + message(FATAL_ERROR "Using ccache: no (found ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION}), the minimum required version is ${CCACHE_MINIMUM_VERSION}") +endif() - set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER}) - set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER}) -else () - message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: No. Found ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION}) but disabled because of bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") -endif () +message(STATUS "Using ccache: ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION})") +set(LAUNCHER ${CCACHE_EXECUTABLE}) + +# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is +# filled from the debian/changelog or current time. +# +# - 4.0+ ccache always includes this environment variable into the hash +# of the manifest, which do not allow to use previous cache, +# - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__ +# +# Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2). +if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") + message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") + set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_EXECUTABLE}) +endif() + +set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER}) +set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER}) From b929eee8db0d2c02fe82e77d91299046fc5f2ba7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 22 Apr 2022 13:37:08 +0200 Subject: [PATCH 203/615] Remove redundant CHECK_NAME from build_check.py --- .github/workflows/backport_branches.yml | 15 +++----- .github/workflows/master.yml | 51 +++++++++---------------- .github/workflows/pull_request.yml | 51 +++++++++---------------- .github/workflows/release_branches.yml | 21 ++++------ tests/ci/build_check.py | 14 +------ 5 files changed, 48 insertions(+), 104 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 0e490e9b1ab..57474c3d9dd 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -131,7 +131,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_release EOF - name: Download changed images @@ -151,7 +150,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -177,7 +176,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_aarch64 EOF - name: Download changed images @@ -197,7 +195,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -223,7 +221,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_asan EOF - name: Download changed images @@ -243,7 +240,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -269,7 +266,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_tsan EOF - name: Download changed images @@ -289,7 +285,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -315,7 +311,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_debug EOF - name: Download changed images @@ -335,7 +330,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 9241c4c6c8c..6457f777289 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -199,7 +199,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_release EOF - name: Download changed images @@ -221,7 +220,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -247,7 +246,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_aarch64 EOF - name: Download changed images @@ -266,7 +264,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -291,7 +289,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=performance EOF - name: Download changed images @@ -313,7 +310,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -339,7 +336,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_release EOF - name: Download changed images @@ -361,7 +357,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -387,7 +383,6 @@ jobs: # IMAGES_PATH=${{runner.temp}}/images_path # REPO_COPY=${{runner.temp}}/build_check/ClickHouse # CACHES_PATH=${{runner.temp}}/../ccaches - # CHECK_NAME=ClickHouse build check (actions) # BUILD_NAME=binary_gcc # EOF # - name: Download changed images @@ -407,7 +402,7 @@ jobs: # sudo rm -fr "$TEMP_PATH" # mkdir -p "$TEMP_PATH" # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" # - name: Upload build URLs to artifacts # if: ${{ success() || failure() }} # uses: actions/upload-artifact@v2 @@ -433,7 +428,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_asan EOF - name: Download changed images @@ -453,7 +447,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -479,7 +473,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_ubsan EOF - name: Download changed images @@ -499,7 +492,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -525,7 +518,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_tsan EOF - name: Download changed images @@ -545,7 +537,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -571,7 +563,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_msan EOF - name: Download changed images @@ -591,7 +582,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -617,7 +608,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_debug EOF - name: Download changed images @@ -637,7 +627,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -666,7 +656,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_splitted EOF - name: Download changed images @@ -686,7 +675,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -712,7 +701,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_tidy EOF - name: Download changed images @@ -732,7 +720,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -758,7 +746,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_darwin EOF - name: Download changed images @@ -780,7 +767,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -806,7 +793,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_aarch64 EOF - name: Download changed images @@ -828,7 +814,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -854,7 +840,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_freebsd EOF - name: Download changed images @@ -876,7 +861,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -902,7 +887,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images @@ -924,7 +908,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -950,7 +934,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_ppc64le EOF - name: Download changed images @@ -972,7 +955,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index d6619a938fa..a2be00ed9a7 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -260,7 +260,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_release EOF - name: Download changed images @@ -280,7 +279,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -306,7 +305,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=performance EOF - name: Download changed images @@ -328,7 +326,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -354,7 +352,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_release EOF - name: Download changed images @@ -374,7 +371,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -400,7 +397,6 @@ jobs: # IMAGES_PATH=${{runner.temp}}/images_path # REPO_COPY=${{runner.temp}}/build_check/ClickHouse # CACHES_PATH=${{runner.temp}}/../ccaches - # CHECK_NAME=ClickHouse build check (actions) # BUILD_NAME=binary_gcc # EOF # - name: Download changed images @@ -420,7 +416,7 @@ jobs: # sudo rm -fr "$TEMP_PATH" # mkdir -p "$TEMP_PATH" # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" # - name: Upload build URLs to artifacts # if: ${{ success() || failure() }} # uses: actions/upload-artifact@v2 @@ -446,7 +442,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_aarch64 EOF - name: Download changed images @@ -466,7 +461,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -492,7 +487,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_asan EOF - name: Download changed images @@ -512,7 +506,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -538,7 +532,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_ubsan EOF - name: Download changed images @@ -558,7 +551,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -584,7 +577,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_tsan EOF - name: Download changed images @@ -604,7 +596,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -630,7 +622,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_msan EOF - name: Download changed images @@ -650,7 +641,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -676,7 +667,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_debug EOF - name: Download changed images @@ -696,7 +686,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -725,7 +715,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_splitted EOF - name: Download changed images @@ -745,7 +734,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -771,7 +760,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_tidy EOF - name: Download changed images @@ -791,7 +779,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -817,7 +805,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_darwin EOF - name: Download changed images @@ -837,7 +824,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -863,7 +850,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_aarch64 EOF - name: Download changed images @@ -883,7 +869,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -909,7 +895,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_freebsd EOF - name: Download changed images @@ -929,7 +914,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -955,7 +940,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images @@ -975,7 +959,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -1001,7 +985,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_ppc64le EOF - name: Download changed images @@ -1021,7 +1004,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 4d409a98c4f..c16a4a6a568 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -122,7 +122,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_release EOF - name: Download changed images @@ -144,7 +143,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -170,7 +169,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_aarch64 EOF - name: Download changed images @@ -189,7 +187,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -214,7 +212,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_asan EOF - name: Download changed images @@ -234,7 +231,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -260,7 +257,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_ubsan EOF - name: Download changed images @@ -280,7 +276,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -306,7 +302,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_tsan EOF - name: Download changed images @@ -326,7 +321,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -352,7 +347,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_msan EOF - name: Download changed images @@ -372,7 +366,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -398,7 +392,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_debug EOF - name: Download changed images @@ -418,7 +411,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index b73bf057393..ceb1543fa96 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -26,15 +26,6 @@ from tee_popen import TeePopen IMAGE_NAME = "clickhouse/binary-builder" -def get_build_config(build_check_name: str, build_name: str) -> BuildConfig: - if build_check_name == "ClickHouse build check (actions)": - build_config_name = "build_config" - else: - raise Exception(f"Unknown build check name {build_check_name}") - - return CI_CONFIG[build_config_name][build_name] - - def _can_export_binaries(build_config: BuildConfig) -> bool: if build_config["package_type"] != "deb": return False @@ -196,10 +187,9 @@ def upload_master_static_binaries( def main(): logging.basicConfig(level=logging.INFO) - build_check_name = sys.argv[1] - build_name = sys.argv[2] + build_name = sys.argv[1] - build_config = get_build_config(build_check_name, build_name) + build_config = CI_CONFIG["build_config"][build_name] if not os.path.exists(TEMP_PATH): os.makedirs(TEMP_PATH) From d2e5ab0f6c147f16ca36722bc85d48c4336ca640 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 22 Apr 2022 13:37:08 +0200 Subject: [PATCH 204/615] Improve packager script a little bit --- docker/packager/binary/build.sh | 2 ++ docker/packager/packager | 47 +++++++++++++-------------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c893263ef09..32891882e27 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -159,3 +159,5 @@ then # files in place, and will fail because this directory is not writable. tar -cv -I pixz -f /output/ccache.log.txz "$CCACHE_LOGFILE" fi + +ls -l /output diff --git a/docker/packager/packager b/docker/packager/packager index b7ffdd698a4..a8707bae01c 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -7,6 +7,7 @@ import logging import sys SCRIPT_PATH = os.path.realpath(__file__) +IMAGE_TYPE = "binary" def check_image_exists_locally(image_name): @@ -75,7 +76,6 @@ def parse_env_variables( compiler, sanitizer, package_type, - image_type, cache, distcc_hosts, split_binary, @@ -153,7 +153,7 @@ def parse_env_variables( cxx = cc.replace("gcc", "g++").replace("clang", "clang++") - if image_type == "deb": + if package_type == "deb": result.append("MAKE_DEB=true") cmake_flags.append("-DENABLE_TESTS=0") cmake_flags.append("-DENABLE_UTILS=0") @@ -258,6 +258,12 @@ def parse_env_variables( return result +def dir_name(name: str) -> str: + if not os.path.isabs(name): + name = os.path.abspath(os.path.join(os.getcwd(), name)) + return name + + if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") parser = argparse.ArgumentParser( @@ -273,12 +279,11 @@ if __name__ == "__main__": ) parser.add_argument( "--clickhouse-repo-path", - default=os.path.join( - os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir - ), + default=os.path.join(os.path.dirname(SCRIPT_PATH), os.pardir, os.pardir), + type=dir_name, help="ClickHouse git repository", ) - parser.add_argument("--output-dir", required=True) + parser.add_argument("--output-dir", type=dir_name, required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") parser.add_argument( "--compiler", @@ -315,6 +320,7 @@ if __name__ == "__main__": parser.add_argument( "--ccache_dir", default=os.getenv("HOME", "") + "/.ccache", + type=dir_name, help="a directory with ccache", ) parser.add_argument("--distcc-hosts", nargs="+") @@ -332,37 +338,23 @@ if __name__ == "__main__": ) args = parser.parse_args() - if not os.path.isabs(args.output_dir): - args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir)) - image_type = ( - "binary" - if args.package_type in ("performance", "coverity") - else args.package_type - ) - image_name = "clickhouse/binary-builder" + image_name = f"clickhouse/{IMAGE_TYPE}-builder" - if not os.path.isabs(args.clickhouse_repo_path): - ch_root = os.path.abspath(os.path.join(os.getcwd(), args.clickhouse_repo_path)) - else: - ch_root = args.clickhouse_repo_path + ch_root = args.clickhouse_repo_path - if args.additional_pkgs and image_type != "deb": + if args.additional_pkgs and args.package_type != "deb": raise Exception("Can build additional packages only in deb build") - if args.with_binaries != "" and image_type != "deb": + if args.with_binaries != "" and args.package_type != "deb": raise Exception("Can add additional binaries only in deb build") - if args.with_binaries != "" and image_type == "deb": + if args.with_binaries != "" and args.package_type == "deb": logging.info("Should place %s to output", args.with_binaries) - dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") + dockerfile = os.path.join(ch_root, "docker/packager", IMAGE_TYPE, "Dockerfile") image_with_version = image_name + ":" + args.docker_image_version - if ( - image_type != "freebsd" - and not check_image_exists_locally(image_name) - or args.force_build_image - ): + if not check_image_exists_locally(image_name) or args.force_build_image: if not pull_image(image_with_version) or args.force_build_image: build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( @@ -370,7 +362,6 @@ if __name__ == "__main__": args.compiler, args.sanitizer, args.package_type, - image_type, args.cache, args.distcc_hosts, args.split_binary, From 96859ec6787b1e20b3022d3d051d2d89023a76fd Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 13 May 2022 12:23:58 +0200 Subject: [PATCH 205/615] Remove unused packager code for deb packages --- docker/images.json | 4 -- docker/packager/binary/build.sh | 7 --- docker/packager/deb/Dockerfile | 81 --------------------------------- docker/packager/deb/build.sh | 58 ----------------------- docker/packager/other/fuzzer.sh | 36 --------------- 5 files changed, 186 deletions(-) delete mode 100644 docker/packager/deb/Dockerfile delete mode 100755 docker/packager/deb/build.sh delete mode 100755 docker/packager/other/fuzzer.sh diff --git a/docker/images.json b/docker/images.json index bdef55e0efc..9b7d44bc990 100644 --- a/docker/images.json +++ b/docker/images.json @@ -1,8 +1,4 @@ { - "docker/packager/deb": { - "name": "clickhouse/deb-builder", - "dependent": [] - }, "docker/packager/binary": { "name": "clickhouse/binary-builder", "dependent": [ diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 32891882e27..3cbddf1d195 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -138,13 +138,6 @@ then mv "coverity-scan.tgz" /output fi -# Also build fuzzers if any sanitizer specified -# if [ -n "$SANITIZER" ] -# then -# # Currently we are in build/build_docker directory -# ../docker/packager/other/fuzzer.sh -# fi - cache_status if [ "${CCACHE_DEBUG:-}" == "1" ] diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile deleted file mode 100644 index 76a5f1d91c0..00000000000 --- a/docker/packager/deb/Dockerfile +++ /dev/null @@ -1,81 +0,0 @@ -# rebuild in #33610 -# docker build -t clickhouse/deb-builder . -FROM ubuntu:20.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13 - -RUN apt-get update \ - && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ - --yes --no-install-recommends --verbose-versions \ - && export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \ - && wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \ - && echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \ - && apt-key add /tmp/llvm-snapshot.gpg.key \ - && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ - && echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ - /etc/apt/sources.list - -# initial packages -RUN apt-get update \ - && apt-get install \ - bash \ - fakeroot \ - ccache \ - curl \ - software-properties-common \ - --yes --no-install-recommends - -# Architecture of the image when BuildKit/buildx is used -ARG TARGETARCH - -# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able -# to compress files using pigz (https://zlib.net/pigz/) instead of gzip. -# Significantly increase deb packaging speed and compatible with old systems -RUN arch=${TARGETARCH:-amd64} \ - && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch} - -RUN apt-get update \ - && apt-get install \ - alien \ - clang-${LLVM_VERSION} \ - clang-tidy-${LLVM_VERSION} \ - cmake \ - debhelper \ - devscripts \ - gdb \ - git \ - gperf \ - lld-${LLVM_VERSION} \ - llvm-${LLVM_VERSION} \ - llvm-${LLVM_VERSION}-dev \ - moreutils \ - ninja-build \ - perl \ - pigz \ - pixz \ - pkg-config \ - tzdata \ - --yes --no-install-recommends - -# NOTE: Seems like gcc-11 is too new for ubuntu20 repository -RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ - && apt-get update \ - && apt-get install gcc-11 g++-11 --yes - - -# These symlinks are required: -# /usr/bin/ld.lld: by gcc to find lld compiler -# /usr/bin/aarch64-linux-gnu-obj*: for debug symbols stripping -RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld \ - && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-strip \ - && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-objcopy \ - && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objdump /usr/bin/aarch64-linux-gnu-objdump - - -COPY build.sh / - -CMD ["/bin/bash", "/build.sh"] diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh deleted file mode 100755 index e1272317c8a..00000000000 --- a/docker/packager/deb/build.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash - -set -x -e - -# Uncomment to debug ccache. -# export CCACHE_LOGFILE=/build/ccache.log -# export CCACHE_DEBUG=1 - -ccache --show-config ||: -ccache --show-stats ||: -ccache --zero-stats ||: - -read -ra ALIEN_PKGS <<< "${ALIEN_PKGS:-}" -build/release "${ALIEN_PKGS[@]}" | ts '%Y-%m-%d %H:%M:%S' -mv /*.deb /output -mv -- *.changes /output -mv -- *.buildinfo /output -mv /*.rpm /output ||: # if exists -mv /*.tgz /output ||: # if exists - -if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;} -then - echo "Place $BINARY_OUTPUT to output" - mkdir /output/binary ||: # if exists - mv /build/obj-*/programs/clickhouse* /output/binary - - if [ "$BINARY_OUTPUT" = "tests" ] - then - mv /build/obj-*/src/unit_tests_dbms /output/binary - fi -fi - -# Also build fuzzers if any sanitizer specified -# if [ -n "$SANITIZER" ] -# then -# # Script is supposed that we are in build directory. -# mkdir -p build/build_docker -# cd build/build_docker -# # Launching build script -# ../docker/packager/other/fuzzer.sh -# cd -# fi - -ccache --show-config ||: -ccache --show-stats ||: - -if [ "${CCACHE_DEBUG:-}" == "1" ] -then - find /build -name '*.ccache-*' -print0 \ - | tar -c -I pixz -f /output/ccache-debug.txz --null -T - -fi - -if [ -n "$CCACHE_LOGFILE" ] -then - # Compress the log as well, or else the CI will try to compress all log - # files in place, and will fail because this directory is not writable. - tar -cv -I pixz -f /output/ccache.log.txz "$CCACHE_LOGFILE" -fi diff --git a/docker/packager/other/fuzzer.sh b/docker/packager/other/fuzzer.sh deleted file mode 100755 index ac820d9e689..00000000000 --- a/docker/packager/other/fuzzer.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash - -# This script is responsible for building all fuzzers, and copy them to output directory -# as an archive. -# Script is supposed that we are in build directory. - -set -x -e - -printenv - -# Delete previous cache, because we add a new flags -DENABLE_FUZZING=1 and -DFUZZER=libfuzzer -rm -f CMakeCache.txt -read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" -# Hope, that the most part of files will be in cache, so we just link new executables -# Please, add or change flags directly in cmake -cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" \ - -DSANITIZE="$SANITIZER" -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_PROTOBUF=1 "${CMAKE_FLAGS[@]}" .. - -FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ') - -NUM_JOBS=$(($(nproc || grep -c ^processor /proc/cpuinfo))) - -mkdir -p /output/fuzzers -for FUZZER_TARGET in $FUZZER_TARGETS -do - # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. - ninja $NINJA_FLAGS $FUZZER_TARGET -j $NUM_JOBS - # Find this binary in build directory and strip it - FUZZER_PATH=$(find ./src -name "$FUZZER_TARGET") - strip --strip-unneeded "$FUZZER_PATH" - mv "$FUZZER_PATH" /output/fuzzers -done - - -tar -zcvf /output/fuzzers.tar.gz /output/fuzzers -rm -rf /output/fuzzers From 8507a4ddbfe8f34c39b8dbab0f3c73a8ab9c30e4 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 13 May 2022 12:50:16 +0200 Subject: [PATCH 206/615] Update nfpm version, prepare UID/GID workdir --- docker/packager/binary/Dockerfile | 5 ++++- docker/packager/binary/build.sh | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index a57a734e3df..d4e824838c2 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -97,12 +97,15 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH -ARG NFPM_VERSION=2.15.0 +ARG NFPM_VERSION=2.15.1 RUN arch=${TARGETARCH:-amd64} \ && curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \ && dpkg -i /tmp/nfpm.deb \ && rm /tmp/nfpm.deb +RUN mkdir /workdir && chmod 777 /workdir +WORKDIR /workdir + COPY build.sh / CMD ["bash", "-c", "/build.sh 2>&1"] diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 3cbddf1d195..50839180453 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -1,18 +1,18 @@ #!/usr/bin/env bash +set -x -e exec &> >(ts) -set -x -e cache_status () { ccache --show-config ||: ccache --show-stats ||: } -git config --global --add safe.directory /build +[ -O /build ] || git config --global --add safe.directory /build -mkdir -p build/cmake/toolchain/darwin-x86_64 -tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 -ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 +mkdir -p /build/cmake/toolchain/darwin-x86_64 +tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 +ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 # Uncomment to debug ccache. Don't put ccache log in /output right away, or it # will be confusingly packed into the "performance" package. @@ -20,8 +20,8 @@ ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 # export CCACHE_DEBUG=1 -mkdir -p build/build_docker -cd build/build_docker +mkdir -p /build/build_docker +cd /build/build_docker rm -f CMakeCache.txt # Read cmake arguments into array (possibly empty) read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" @@ -61,10 +61,10 @@ fi if [ "coverity" == "$COMBINED_OUTPUT" ] then - mkdir -p /opt/cov-analysis + mkdir -p /workdir/cov-analysis - wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /opt/cov-analysis --strip-components 1 - export PATH=$PATH:/opt/cov-analysis/bin + wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1 + export PATH=$PATH:/workdir/cov-analysis/bin cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC" SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int" fi From 1f67665c95d9de38f1641e4412c8349aa67344ed Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 13 May 2022 13:36:08 +0200 Subject: [PATCH 207/615] Build performance output in release builds --- .github/workflows/master.yml | 55 ++--------------- .github/workflows/pull_request.yml | 55 ++--------------- docker/packager/binary/build.sh | 61 +++++++++++++------ docker/packager/packager | 10 +-- .../test/performance-comparison/download.sh | 8 ++- .../test/performance-comparison/entrypoint.sh | 16 +++-- tests/ci/ci_config.py | 13 +--- tests/ci/performance_comparison_check.py | 3 + 8 files changed, 73 insertions(+), 148 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6457f777289..0c760335296 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -278,53 +278,6 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderPerformance: - needs: DockerHubPush - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=performance - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - with: - fetch-depth: 0 # is needed for ancestor commit search - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -2952,7 +2905,7 @@ jobs: #################################### PERFORMANCE TESTS ###################################### ############################################################################################# PerformanceComparison0: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -2990,7 +2943,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison1: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3028,7 +2981,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison2: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3066,7 +3019,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison3: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index a2be00ed9a7..8dcc841bef9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -294,53 +294,6 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" - BuilderPerformance: - needs: [DockerHubPush, FastTest] - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=performance - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - with: - fetch-depth: 0 # is needed for ancestor commit search - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -3163,7 +3116,7 @@ jobs: #################################### PERFORMANCE TESTS ###################################### ############################################################################################# PerformanceComparison0: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3201,7 +3154,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison1: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3239,7 +3192,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison2: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3277,7 +3230,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison3: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 50839180453..b8d11e9c293 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -89,16 +89,36 @@ mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds find . -name '*.so' -print -exec mv '{}' /output \; find . -name '*.so.*' -print -exec mv '{}' /output \; -# Different files for performance test. -if [ "performance" == "$COMBINED_OUTPUT" ] -then - cp -r ../tests/performance /output - cp -r ../tests/config/top_level_domains /output - cp -r ../docker/test/performance-comparison/config /output ||: - rm /output/unit_tests_dbms ||: - rm /output/clickhouse-odbc-bridge ||: +prepare_combined_output () { + local OUTPUT + OUTPUT="$1" - cp -r ../docker/test/performance-comparison /output/scripts ||: + mkdir -p "$OUTPUT"/config + cp /build/programs/server/config.xml "$OUTPUT"/config + cp /build/programs/server/users.xml "$OUTPUT"/config + cp -r --dereference /build/programs/server/config.d "$OUTPUT"/config +} + +# Different files for performance test. +if [ "$WITH_PERFORMANCE" == 1 ] +then + PERF_OUTPUT=/workdir/performance/output + mkdir -p "$PERF_OUTPUT" + cp -r ../tests/performance "$PERF_OUTPUT" + cp -r ../tests/config/top_level_domains "$PERF_OUTPUT" + cp -r ../docker/test/performance-comparison/config "$PERF_OUTPUT" ||: + for SRC in /output/clickhouse*; do + # Copy all clickhouse* files except packages and bridges + [[ "$SRC" != *.* ]] && [[ "$SRC" != *-bridge ]] && \ + cp -d "$SRC" "$PERF_OUTPUT" + done + if [ -x "$PERF_OUTPUT"/clickhouse-keeper ]; then + # Replace standalone keeper by symlink + ln -sf clickhouse "$PERF_OUTPUT"/clickhouse-keeper + fi + + cp -r ../docker/test/performance-comparison "$PERF_OUTPUT"/scripts ||: + prepare_combined_output "$PERF_OUTPUT" # We have to know the revision that corresponds to this binary build. # It is not the nominal SHA from pull/*/head, but the pull/*/merge, which is @@ -111,22 +131,23 @@ then # for a given nominal SHA, but it is not accessible outside Yandex. # This is why we add this repository snapshot from CI to the performance test # package. - mkdir /output/ch - git -C /output/ch init --bare - git -C /output/ch remote add origin /build - git -C /output/ch fetch --no-tags --depth 50 origin HEAD:pr - git -C /output/ch fetch --no-tags --depth 50 origin master:master - git -C /output/ch reset --soft pr - git -C /output/ch log -5 + mkdir "$PERF_OUTPUT"/ch + git -C "$PERF_OUTPUT"/ch init --bare + git -C "$PERF_OUTPUT"/ch remote add origin /build + git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin HEAD:pr + git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:master + git -C "$PERF_OUTPUT"/ch reset --soft pr + git -C "$PERF_OUTPUT"/ch log -5 + ( + cd "$PERF_OUTPUT"/.. + tar -cv -I pigz -f /output/performance.tgz output + ) fi # May be set for split build or for performance test. if [ "" != "$COMBINED_OUTPUT" ] then - mkdir -p /output/config - cp ../programs/server/config.xml /output/config - cp ../programs/server/users.xml /output/config - cp -r --dereference ../programs/server/config.d /output/config + prepare_combined_output /output tar -cv -I pigz -f "$COMBINED_OUTPUT.tgz" /output rm -r /output/* mv "$COMBINED_OUTPUT.tgz" /output diff --git a/docker/packager/packager b/docker/packager/packager index a8707bae01c..b60b0c7bab6 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -165,6 +165,7 @@ def parse_env_variables( cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") if is_release_build(build_type, package_type, sanitizer, split_binary): cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON") + result.append("WITH_PERFORMANCE=1") if is_cross_arm: cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") else: @@ -176,10 +177,7 @@ def parse_env_variables( cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") # Create combined output archive for split build and for performance tests. - if package_type == "performance": - result.append("COMBINED_OUTPUT=performance") - cmake_flags.append("-DENABLE_TESTS=0") - elif package_type == "coverity": + if package_type == "coverity": result.append("COMBINED_OUTPUT=coverity") result.append('COVERITY_TOKEN="$COVERITY_TOKEN"') elif split_binary: @@ -270,11 +268,9 @@ if __name__ == "__main__": formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="ClickHouse building script using prebuilt Docker image", ) - # 'performance' creates a combined .tgz with server - # and configs to be used for performance test. parser.add_argument( "--package-type", - choices=["deb", "binary", "performance", "coverity"], + choices=["deb", "binary", "coverity"], required=True, ) parser.add_argument( diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index ae9e677713f..463c08c5304 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -3,6 +3,7 @@ set -ex set -o pipefail trap "exit" INT TERM trap 'kill $(jobs -pr) ||:' EXIT +BUILD_NAME=${BUILD_NAME:-package_release} mkdir db0 ||: mkdir left ||: @@ -26,7 +27,10 @@ function download { # Historically there were various paths for the performance test package. # Test all of them. - declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz") + declare -a urls_to_try=( + "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tgz" + "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz" + ) for path in "${urls_to_try[@]}" do @@ -41,7 +45,7 @@ function download # download anything, for example in some manual runs. In this case, SHAs are not set. if ! [ "$left_sha" = "$right_sha" ] then - wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv & + wget -nv -nd -c "$left_path" -O- | tar -C left --no-same-owner --strip-components=1 -zxv & elif [ "$right_sha" != "" ] then mkdir left ||: diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index cc6e55dac1f..37bb86be401 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -5,6 +5,7 @@ CHPC_CHECK_START_TIMESTAMP="$(date +%s)" export CHPC_CHECK_START_TIMESTAMP S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"} +BUILD_NAME=${BUILD_NAME:-package_release} COMMON_BUILD_PREFIX="/clickhouse_build_check" if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then @@ -64,7 +65,12 @@ function find_reference_sha # Historically there were various path for the performance test package, # test all of them. unset found - declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz") + declare -a urls_to_try=( + "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tgz" + # FIXME: the following link is left there for backward compatibility. + # We should remove it after 2022-11-01 + "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz" + ) for path in "${urls_to_try[@]}" do if curl_with_retry "$path" @@ -88,13 +94,13 @@ chmod 777 workspace output cd workspace # Download the package for the version we are going to test. -if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz" +if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tgz" then - right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz" + right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tgz" fi mkdir right -wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv +wget -nv -nd -c "$right_path" -O- | tar -C right --no-same-owner --strip-components=1 -zxv # Find reference revision if not specified explicitly if [ "$REF_SHA" == "" ]; then find_reference_sha; fi @@ -155,7 +161,7 @@ ulimit -c unlimited cat /proc/sys/kernel/core_pattern # Start the main comparison script. -{ \ +{ time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \ time stage=configure "$script_path"/compare.sh ; \ } 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 1a070c781d4..33430c11a53 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -18,16 +18,6 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, - "performance": { - "compiler": "clang-13", - "build_type": "", - "sanitizer": "", - "package_type": "performance", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": False, - }, "coverity": { "compiler": "clang-13", "build_type": "", @@ -202,7 +192,6 @@ CI_CONFIG = { "builds_report_config": { "ClickHouse build check (actions)": [ "package_release", - "performance", "coverity", "package_aarch64", "package_asan", @@ -372,7 +361,7 @@ CI_CONFIG = { "required_build": "binary_release", }, "Performance Comparison (actions)": { - "required_build": "performance", + "required_build": "package_release", }, }, } # type: dict diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index aa848b37109..0bba4ea2154 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -12,6 +12,7 @@ import re from github import Github from env_helper import GITHUB_RUN_URL +from ci_config import CI_CONFIG from pr_info import PRInfo from s3_helper import S3Helper from get_robot_token import get_best_robot_token @@ -69,6 +70,7 @@ if __name__ == "__main__": reports_path = os.getenv("REPORTS_PATH", "./reports") check_name = sys.argv[1] + required_build = CI_CONFIG["tests_config"][check_name]["required_build"] if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -83,6 +85,7 @@ if __name__ == "__main__": docker_env = "" docker_env += " -e S3_URL=https://s3.amazonaws.com/clickhouse-builds" + docker_env += f" -e BUILD_NAME={required_build}" if pr_info.number == 0: pr_link = commit.html_url From 48751f5a91bc30ec254ffff5fb4b3bd4d4cfee7b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 16 May 2022 13:33:28 +0200 Subject: [PATCH 208/615] Checkout master:master for performance output in packager --- .github/workflows/master.yml | 6 +++--- .github/workflows/pull_request.yml | 6 +++++- docker/packager/packager | 28 ++++++++++++++++++++++++++++ tests/ci/build_check.py | 9 --------- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 0c760335296..2af54da5e16 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -212,7 +212,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - fetch-depth: 0 # otherwise we will have no info about contributors + fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | git -C "$GITHUB_WORKSPACE" submodule sync --recursive @@ -256,7 +256,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - fetch-depth: 0 # otherwise we will have no info about contributors + fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | git -C "$GITHUB_WORKSPACE" submodule sync --recursive @@ -938,7 +938,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - fetch-depth: 0 # otherwise we will have no version info + fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 8dcc841bef9..6482ddebe06 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -272,6 +272,8 @@ jobs: sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 + with: + fetch-depth: 0 # for performance artifact - name: Build run: | git -C "$GITHUB_WORKSPACE" submodule sync --recursive @@ -407,6 +409,8 @@ jobs: sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 + with: + fetch-depth: 0 # for performance artifact - name: Build run: | git -C "$GITHUB_WORKSPACE" submodule sync --recursive @@ -987,7 +991,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - fetch-depth: 1 # It MUST BE THE SAME for all dependencies and the job itself + fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/docker/packager/packager b/docker/packager/packager index b60b0c7bab6..e197c06b4af 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -5,6 +5,7 @@ import os import argparse import logging import sys +from typing import List SCRIPT_PATH = os.path.realpath(__file__) IMAGE_TYPE = "binary" @@ -39,6 +40,32 @@ def build_image(image_name, filepath): ) +def pre_build(repo_path: str, env_variables: List[str]): + if "WITH_PERFORMANCE=1" in env_variables: + current_branch = subprocess.check_output( + "git branch --show-current", shell=True, encoding="utf-8" + ).strip() + is_shallow = ( + subprocess.check_output( + "git rev-parse --is-shallow-repository", shell=True, encoding="utf-8" + ) + == "true\n" + ) + if is_shallow: + # I've spent quite some time on looking around the problem, and my + # conclusion is: in the current state the easiest way to go is to force + # unshallow repository for performance artifacts. + # To change it we need to rework our performance tests docker image + raise Exception("shallow repository is not suitable for performance builds") + if current_branch != "master": + cmd = ( + f"git -C {repo_path} fetch --no-recurse-submodules " + "--no-tags origin master:master" + ) + logging.info("Getting master branch for performance artifact: ''%s'", cmd) + subprocess.check_call(cmd, shell=True) + + def run_docker_image_with_env( image_name, output, env_variables, ch_root, ccache_dir, docker_image_version ): @@ -370,6 +397,7 @@ if __name__ == "__main__": args.with_binaries, ) + pre_build(args.clickhouse_repo_path, env_prepared) run_docker_image_with_env( image_name, args.output_dir, diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index ceb1543fa96..749a416ecad 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -276,15 +276,6 @@ def main(): logging.info("cache was not fetched, will create empty dir") os.makedirs(ccache_path) - if build_config["package_type"] == "performance" and pr_info.number != 0: - # because perf tests store some information about git commits - cmd = ( - f"cd {REPO_COPY} && git fetch --depth=60 --no-recurse-submodules " - "--no-tags origin master:master" - ) - logging.info("Fetch master branch with a command: %s", cmd) - subprocess.check_call(cmd, shell=True) - packager_cmd = get_packager_cmd( build_config, os.path.join(REPO_COPY, "docker/packager"), From ec3246ea4feb717d3c2ec16bd564ac9d8f51bfaf Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 16 May 2022 13:37:51 +0200 Subject: [PATCH 209/615] Run binary-builder as a current user --- docker/packager/packager | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/docker/packager/packager b/docker/packager/packager index e197c06b4af..578b5a38bfb 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -67,7 +67,13 @@ def pre_build(repo_path: str, env_variables: List[str]): def run_docker_image_with_env( - image_name, output, env_variables, ch_root, ccache_dir, docker_image_version + image_name, + as_root, + output, + env_variables, + ch_root, + ccache_dir, + docker_image_version, ): env_part = " -e ".join(env_variables) if env_part: @@ -78,8 +84,13 @@ def run_docker_image_with_env( else: interactive = "" + if as_root: + user = "0:0" + else: + user = f"{os.geteuid()}:{os.getegid()}" + cmd = ( - f"docker run --network=host --rm --volume={output}:/output " + f"docker run --network=host --user={user} --rm --volume={output}:/output " f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} " f"{interactive} {image_name}:{docker_image_version}" ) @@ -359,6 +370,9 @@ if __name__ == "__main__": parser.add_argument( "--docker-image-version", default="latest", help="docker image tag to use" ) + parser.add_argument( + "--as-root", action="store_true", help="if the container should run as root" + ) args = parser.parse_args() @@ -400,6 +414,7 @@ if __name__ == "__main__": pre_build(args.clickhouse_repo_path, env_prepared) run_docker_image_with_env( image_name, + args.as_root, args.output_dir, env_prepared, ch_root, From 33eed2280b86bb8f424f697ddad7c30d7d611652 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 16 May 2022 20:39:10 +0200 Subject: [PATCH 210/615] Fix linter issues --- tests/ci/env_helper.py | 2 +- tests/ci/performance_comparison_check.py | 63 +++++++++++++----------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 6462baad729..c97c6298acc 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -9,7 +9,7 @@ TEMP_PATH = os.getenv("TEMP_PATH", module_dir) CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") -GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") +GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH", "") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0") GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 0bba4ea2154..b491c739653 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -8,18 +8,19 @@ import json import subprocess import traceback import re +from typing import Dict from github import Github -from env_helper import GITHUB_RUN_URL -from ci_config import CI_CONFIG -from pr_info import PRInfo -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from docker_pull_helper import get_image_with_version from commit_status_helper import get_commit, post_commit_status -from tee_popen import TeePopen +from ci_config import CI_CONFIG +from docker_pull_helper import get_image_with_version +from env_helper import GITHUB_EVENT_PATH, GITHUB_RUN_URL +from get_robot_token import get_best_robot_token +from pr_info import PRInfo from rerun_helper import RerunHelper +from s3_helper import S3Helper +from tee_popen import TeePopen IMAGE_NAME = "clickhouse/performance-comparison" @@ -34,7 +35,8 @@ def get_run_command( image, ): return ( - f"docker run --privileged --volume={workspace}:/workspace --volume={result_path}:/output " + f"docker run --privileged --volume={workspace}:/workspace " + f"--volume={result_path}:/output " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " @@ -75,7 +77,7 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv("GITHUB_EVENT_PATH"), "r", encoding="utf-8") as event_file: + with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file: event = json.load(event_file) gh = Github(get_best_robot_token()) @@ -98,9 +100,12 @@ if __name__ == "__main__": ) if "RUN_BY_HASH_TOTAL" in os.environ: - run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL")) - run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM")) - docker_env += f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total} -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}" + run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "1")) + run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "1")) + docker_env += ( + f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total}" + f" -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}" + ) check_name_with_group = ( check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" ) @@ -160,13 +165,12 @@ if __name__ == "__main__": ) s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/" s3_helper = S3Helper("https://s3.amazonaws.com") - for file in paths: + uploaded = {} # type: Dict[str, str] + for name, path in paths.items(): try: - paths[file] = s3_helper.upload_test_report_to_s3( - paths[file], s3_prefix + file - ) + uploaded[name] = s3_helper.upload_test_report_to_s3(path, s3_prefix + name) except Exception: - paths[file] = "" + uploaded[name] = "" traceback.print_exc() # Upload all images and flamegraphs to S3 @@ -181,9 +185,12 @@ if __name__ == "__main__": status = "" message = "" try: - report_text = open(os.path.join(result_path, "report.html"), "r").read() - status_match = re.search("", report_text) - message_match = re.search("", report_text) + with open( + os.path.join(result_path, "report.html"), "r", encoding="utf-8" + ) as report_fd: + report_text = report_fd.read() + status_match = re.search("", report_text) + message_match = re.search("", report_text) if status_match: status = status_match.group(1).strip() if message_match: @@ -208,17 +215,17 @@ if __name__ == "__main__": report_url = GITHUB_RUN_URL - if paths["runlog.log"]: - report_url = paths["runlog.log"] + if uploaded["runlog.log"]: + report_url = uploaded["runlog.log"] - if paths["compare.log"]: - report_url = paths["compare.log"] + if uploaded["compare.log"]: + report_url = uploaded["compare.log"] - if paths["output.7z"]: - report_url = paths["output.7z"] + if uploaded["output.7z"]: + report_url = uploaded["output.7z"] - if paths["report.html"]: - report_url = paths["report.html"] + if uploaded["report.html"]: + report_url = uploaded["report.html"] post_commit_status( gh, pr_info.sha, check_name_with_group, message, status, report_url From 79c30f5e581fc076ecdbd4b94d276b4544ec57b0 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 17 May 2022 23:56:22 +0200 Subject: [PATCH 211/615] Do not fail if output and workspace are the same --- docker/test/performance-comparison/entrypoint.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 37bb86be401..4b9a66d2195 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -184,4 +184,6 @@ ls -lath report analyze benchmark metrics \ ./*.core.dmp ./*.core -cp compare.log /output +# If the files aren't same, copy it +cmp --silent compare.log /output/compare.log || \ + cp compare.log /output From 0578ef9372c3b03d71a622f59c9ece6bd48067dd Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 18 May 2022 10:26:12 +0800 Subject: [PATCH 212/615] add test case. test_hive_query::test_cache_dir_use --- tests/integration/test_hive_query/test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 374a86d51e8..ed9cbf33913 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -389,3 +389,9 @@ def test_cache_read_bytes(started_cluster): test_passed = True break assert test_passed + +def test_cache_dir_use(started_cluster): + node = started_cluster.instances['h0_0_0'] + result0 = node.exec_in_container(['bash', '-c', 'ls /tmp/clickhouse_local_cache | wc -l']) + result1 = node.exec_in_container(['bash', '-c', 'ls /tmp/clickhouse_local_cache1 | wc -l']) + assert result0 != '0' and result1 != '0' From afed8317854a47cde4b5903b3684ea081980429f Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 18 May 2022 14:07:23 +0800 Subject: [PATCH 213/615] fixed black check --- tests/integration/test_hive_query/test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index ed9cbf33913..d156fa34c62 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -392,6 +392,10 @@ def test_cache_read_bytes(started_cluster): def test_cache_dir_use(started_cluster): node = started_cluster.instances['h0_0_0'] - result0 = node.exec_in_container(['bash', '-c', 'ls /tmp/clickhouse_local_cache | wc -l']) - result1 = node.exec_in_container(['bash', '-c', 'ls /tmp/clickhouse_local_cache1 | wc -l']) - assert result0 != '0' and result1 != '0' + result0 = node.exec_in_container( + ["bash", "-c", "ls /tmp/clickhouse_local_cache | wc -l"] + ) + result1 = node.exec_in_container( + ["bash", "-c", "ls /tmp/clickhouse_local_cache1 | wc -l"] + ) + assert result0 != "0" and result1 != "0" From f10249792938cc3d4a6f5d7e07b77d70d1c1050c Mon Sep 17 00:00:00 2001 From: Vxider Date: Wed, 18 May 2022 08:05:06 +0000 Subject: [PATCH 214/615] fix parser with join query --- src/Storages/WindowView/StorageWindowView.cpp | 26 ++++++++----------- src/Storages/WindowView/StorageWindowView.h | 3 ++- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 15bacbfbd08..60259e46733 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -441,7 +441,7 @@ ASTPtr StorageWindowView::getCleanupQuery() ASTPtr function_equal; function_equal = makeASTFunction( "less", - std::make_shared(window_id_name), + std::make_shared(inner_window_id_column_name), std::make_shared(getCleanupBound())); auto alter_query = std::make_shared(); @@ -497,7 +497,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) { /// SELECT * FROM inner_table WHERE window_id_name == w_end /// (because we fire at the end of windows) - filter_function = makeASTFunction("equals", std::make_shared(window_id_name), std::make_shared(watermark)); + filter_function = makeASTFunction("equals", std::make_shared(inner_window_id_column_name), std::make_shared(watermark)); } else { @@ -516,7 +516,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) func_array ->arguments->children.push_back(std::make_shared(w_end)); w_end = addTime(w_end, window_kind, -slice_num_units, *time_zone); } - filter_function = makeASTFunction("has", func_array, std::make_shared(window_id_name)); + filter_function = makeASTFunction("has", func_array, std::make_shared(inner_window_id_column_name)); } auto syntax_result = TreeRewriter(getContext()).analyze(filter_function, builder.getHeader().getNamesAndTypesList()); @@ -531,7 +531,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) /// Adding window column DataTypes window_column_type{std::make_shared(), std::make_shared()}; ColumnWithTypeAndName column; - column.name = window_column_name; + column.name = inner_window_column_name; column.type = std::make_shared(std::move(window_column_type)); column.column = column.type->createColumnConst(0, Tuple{w_start, watermark}); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); @@ -544,7 +544,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) /// Removing window id column auto new_header = builder.getHeader(); - new_header.erase(window_id_name); + new_header.erase(inner_window_id_column_name); auto convert_actions_dag = ActionsDAG::makeConvertingActions( builder.getHeader().getColumnsWithTypeAndName(), new_header.getColumnsWithTypeAndName(), @@ -694,14 +694,13 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( auto columns_list = std::make_shared(); - String window_id_column_name; if (is_time_column_func_now) { auto column_window = std::make_shared(); column_window->name = window_id_name; column_window->type = std::make_shared("UInt32"); columns_list->children.push_back(column_window); - window_id_column_name = window_id_name; + inner_window_id_column_name = window_id_name; } for (const auto & column : t_sample_block.getColumnsWithTypeAndName()) @@ -713,17 +712,19 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( column_dec->name = column.name; column_dec->type = ast; columns_list->children.push_back(column_dec); - if (!is_time_column_func_now && window_id_column_name.empty() && startsWith(column.name, "windowID")) + if (!is_time_column_func_now && inner_window_id_column_name.empty() && startsWith(column.name, "windowID")) { - window_id_column_name = column.name; + inner_window_id_column_name = column.name; } } - if (window_id_column_name.empty()) + if (inner_window_id_column_name.empty()) throw Exception( "The first argument of time window function should not be a constant value.", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + inner_window_column_name = std::regex_replace(inner_window_id_column_name, std::regex("windowID"), is_tumble ? "tumble" : "hop"); + ToIdentifierMatcher::Data query_data; query_data.window_id_name = window_id_name; query_data.window_id_alias = window_id_alias; @@ -1172,11 +1173,6 @@ StorageWindowView::StorageWindowView( /// Extract information about watermark, lateness. eventTimeParser(query); - if (is_tumble) - window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "tumble"); - else - window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "hop"); - if (attach_) { inner_table_id = StorageID(table_id_.database_name, generateInnerTableName(table_id_)); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 83b8df1a554..da33e750ae5 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -223,7 +223,8 @@ private: Int64 slide_num_units; String window_id_name; String window_id_alias; - String window_column_name; + String inner_window_column_name; + String inner_window_id_column_name; String timestamp_column_name; StorageID select_table_id = StorageID::createEmpty(); From 6ce8947417f0e9bda73411e7cf72d8dea01b7356 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 17 May 2022 22:47:33 +0200 Subject: [PATCH 215/615] GLOB with CONFIGURE_DEPENDS Globbing generally misses to pick up files which were added/deleted after CMake's configure. This is a nuissance but can be alleviated using CONFIGURE_DEPENDS (available since CMake 3.12) which adds a check for new/deleted files before each compile and - if necessary - restarts the configuration. On my system, the check takes < 0.1 sec. (Side note: CONFIGURE_DEPENDS is not guaranteed to work accross all generators, but at least it works for Ninja which everyone @CH seems to use.) --- cmake/dbms_glob_sources.cmake | 2 +- src/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/dbms_glob_sources.cmake b/cmake/dbms_glob_sources.cmake index 0f5c6106b70..01c4a8b16e9 100644 --- a/cmake/dbms_glob_sources.cmake +++ b/cmake/dbms_glob_sources.cmake @@ -1,5 +1,5 @@ macro(add_glob cur_list) - file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN}) + file(GLOB __tmp CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN}) list(APPEND ${cur_list} ${__tmp}) endmacro() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 20db948abd0..1cbcd927216 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -560,7 +560,7 @@ include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") if (ENABLE_TESTS) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories - file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") + file(GLOB_RECURSE "${DST_VAR}" CONFIGURE_DEPENDS RELATIVE "${BASE_DIR}" "gtest*.cpp") endmacro() # attach all dbms gtest sources From 9ed7f2109fe8153f6a93dba81239cf4f30248406 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 18 May 2022 17:02:42 +0800 Subject: [PATCH 216/615] fixed black check --- tests/integration/test_hive_query/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index d156fa34c62..8c37fd81e7c 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -391,7 +391,7 @@ def test_cache_read_bytes(started_cluster): assert test_passed def test_cache_dir_use(started_cluster): - node = started_cluster.instances['h0_0_0'] + node = started_cluster.instances["h0_0_0"] result0 = node.exec_in_container( ["bash", "-c", "ls /tmp/clickhouse_local_cache | wc -l"] ) From 12010a81b713062158b68d4e7b34af01ea77c0e3 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 18 May 2022 09:25:26 +0000 Subject: [PATCH 217/615] Make better --- docs/en/interfaces/formats.md | 257 ++--- src/Core/BlockInfo.cpp | 3 +- src/Core/BlockInfo.h | 2 + src/Formats/EscapingRuleUtils.cpp | 2 +- src/Formats/JSONUtils.cpp | 994 +++++++++--------- src/Formats/JSONUtils.h | 132 +-- .../Impl/JSONAsStringRowInputFormat.cpp | 8 +- .../Impl/JSONColumnsBaseBlockInputFormat.cpp | 6 +- .../Impl/JSONColumnsBaseBlockOutputFormat.cpp | 6 +- .../Impl/JSONColumnsBlockOutputFormat.cpp | 6 +- ...ONColumnsWithMetadataBlockOutputFormat.cpp | 38 +- .../JSONCompactColumnsBlockOutputFormat.cpp | 6 +- .../Impl/JSONCompactEachRowRowInputFormat.cpp | 8 +- .../Impl/JSONCompactRowOutputFormat.cpp | 22 +- .../Impl/JSONEachRowRowInputFormat.cpp | 22 +- .../Formats/Impl/JSONRowOutputFormat.cpp | 46 +- 16 files changed, 802 insertions(+), 756 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 0bea523bf7f..c248c6644cb 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -403,6 +403,8 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. +For input format columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. + ## CSV {#csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). @@ -462,15 +464,15 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], @@ -478,25 +480,32 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "data": [ { - "'hello'": "hello", - "multiply(42, number)": "0", - "range(5)": [0,1,2,3,4] + "num": 42, + "str": "hello", + "arr": [0,1] }, { - "'hello'": "hello", - "multiply(42, number)": "42", - "range(5)": [0,1,2,3,4] + "num": 43, + "str": "hello", + "arr": [0,1,2] }, { - "'hello'": "hello", - "multiply(42, number)": "84", - "range(5)": [0,1,2,3,4] + "num": 44, + "str": "hello", + "arr": [0,1,2,3] } ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001137687, + "rows_read": 3, + "bytes_read": 24 + } } ``` @@ -531,15 +540,15 @@ Example: "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], @@ -547,100 +556,94 @@ Example: "data": [ { - "'hello'": "hello", - "multiply(42, number)": "0", - "range(5)": "[0,1,2,3,4]" + "num": "42", + "str": "hello", + "arr": "[0,1]" }, { - "'hello'": "hello", - "multiply(42, number)": "42", - "range(5)": "[0,1,2,3,4]" + "num": "43", + "str": "hello", + "arr": "[0,1,2]" }, { - "'hello'": "hello", - "multiply(42, number)": "84", - "range(5)": "[0,1,2,3,4]" + "num": "44", + "str": "hello", + "arr": "[0,1,2,3]" } ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001403233, + "rows_read": 3, + "bytes_read": 24 + } } ``` ## JSONColumns {#jsoncolumns} In this format, all data is represented as a single JSON Object. -Note that JSONColumns output format buffers all data in memory to output it as a single block. +Note that JSONColumns output format buffers all data in memory to output it as a single block and it can lead to high memory consumption. Example: ```json { - "name1": [1, 2, 3, 4], - "name2": ["Hello", ",", "world", "!"], - "name3": [[1, 2], [3, 4], [5, 6], [7, 8]] + "num": [42, 43, 44], + "str": ["hello", "hello", "hello"], + "arr": [[0,1], [0,1,2], [0,1,2,3]] } ``` Columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. -Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) ## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} -Differs from JSON output format in that it outputs columns as in JSONColumns format. -This format buffers all data in memory and then outputs them as a single block. +Differs from JSONColumns output format in that it also outputs some metadata and statistics (similar to JSON output format). +This format buffers all data in memory and then outputs them as a single block, so, it can lead to high memory consumption. Example: ```json { - "meta": - [ - { - "name": "sum", - "type": "UInt64" - }, - { - "name": "avg", - "type": "Float64" - } - ], + "meta": + [ + { + "name": "num", + "type": "Int32" + }, + { + "name": "str", + "type": "String" + }, + { + "name": "arr", + "type": "Array(UInt8)" + } + ], - "data": - { - "sum": ["1", "2", "3", "4"], - "avg": [1, 2, 3, 2] - }, + "data": + { + "num": [42, 43, 44], + "str": ["hello", "hello", "hello"], + "arr": [[0,1], [0,1,2], [0,1,2,3]] + }, - "totals": - { - "sum": "10", - "avg": 2 - }, + "rows": 3, - "extremes": - { - "min": - { - "sum": "1", - "avg": 1 - }, - "max": - { - "sum": "4", - "avg": 3 - } - }, + "rows_before_limit_at_least": 3, - "rows": 4, - - "statistics": - { - "elapsed": 0.003701718, - "rows_read": 5, - "bytes_read": 20 - } + "statistics": + { + "elapsed": 0.000272376, + "rows_read": 3, + "bytes_read": 24 + } } ``` @@ -696,87 +699,101 @@ Result: Differs from JSON only in that data rows are output in arrays, not in objects. -Example: +Examples: -``` -// JSONCompact +1) JSONCompact: +```json { "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], "data": [ - ["hello", "0", [0,1,2,3,4]], - ["hello", "42", [0,1,2,3,4]], - ["hello", "84", [0,1,2,3,4]] + [42, "hello", [0,1]], + [43, "hello", [0,1,2]], + [44, "hello", [0,1,2,3]] ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001222069, + "rows_read": 3, + "bytes_read": 24 + } } ``` -``` -// JSONCompactStrings +2) JSONCompactStrings +```json { "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], "data": [ - ["hello", "0", "[0,1,2,3,4]"], - ["hello", "42", "[0,1,2,3,4]"], - ["hello", "84", "[0,1,2,3,4]"] + ["42", "hello", "[0,1]"], + ["43", "hello", "[0,1,2]"], + ["44", "hello", "[0,1,2,3]"] ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001572097, + "rows_read": 3, + "bytes_read": 24 + } } ``` ## JSONCompactColumns {#jsoncompactcolumns} In this format, all data is represented as a single JSON Array. -Note that JSONCompactColumns output format buffers all data in memory to output it as a single block. +Note that JSONCompactColumns output format buffers all data in memory to output it as a single block and it can lead to high memory consumption Example: ```json [ - [1, 2, 3, 4], - ["Hello", ",", "world", "!"], - [[1, 2], [3, 4], [5, 6], [7, 8]] + [42, 43, 44], + ["hello", "hello", "hello"], + [[0,1], [0,1,2], [0,1,2,3]] ] ``` -Columns that are not presente in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) ## JSONEachRow {#jsoneachrow} ## JSONStringsEachRow {#jsonstringseachrow} @@ -793,15 +810,17 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite When inserting the data, you should provide a separate JSON value for each row. +In JSONEachRow/JSONStringsEachRow input formats columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. + ## JSONEachRowWithProgress {#jsoneachrowwithprogress} ## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress} Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values. ```json -{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}} -{"row":{"'hello'":"hello","multiply(42, number)":"42","range(5)":[0,1,2,3,4]}} -{"row":{"'hello'":"hello","multiply(42, number)":"84","range(5)":[0,1,2,3,4]}} +{"row":{"num":42,"str":"hello","arr":[0,1]}} +{"row":{"num":43,"str":"hello","arr":[0,1,2]}} +{"row":{"num":44,"str":"hello","arr":[0,1,2,3]}} {"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}} ``` @@ -822,11 +841,11 @@ Differs from `JSONCompactStringsEachRow` in that in that it also prints the head Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). ```json -["'hello'", "multiply(42, number)", "range(5)"] -["String", "UInt64", "Array(UInt8)"] -["hello", "0", [0,1,2,3,4]] -["hello", "42", [0,1,2,3,4]] -["hello", "84", [0,1,2,3,4]] +["num", "str", "arr"] +["Int32", "String", "Array(UInt8)"] +[42, "hello", [0,1]] +[43, "hello", [0,1,2]] +[44, "hello", [0,1,2,3]] ``` ### Inserting Data {#inserting-data} diff --git a/src/Core/BlockInfo.cpp b/src/Core/BlockInfo.cpp index ae32e8aa579..81064dec733 100644 --- a/src/Core/BlockInfo.cpp +++ b/src/Core/BlockInfo.cpp @@ -68,7 +68,8 @@ void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) void BlockMissingValues::setBits(size_t column_idx, size_t rows) { RowsBitMask & mask = rows_mask_by_column_id[column_idx]; - mask.resize(rows, true); + mask.resize(rows); + std::fill(mask.begin(), mask.end(), true); } const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const diff --git a/src/Core/BlockInfo.h b/src/Core/BlockInfo.h index 70cd3bb2221..d431303ca39 100644 --- a/src/Core/BlockInfo.h +++ b/src/Core/BlockInfo.h @@ -56,7 +56,9 @@ public: const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; /// Check that we have to replace default value at least in one of columns bool hasDefaultBits(size_t column_idx) const; + /// Set bit for a specified row in a single column. void setBit(size_t column_idx, size_t row_idx); + /// Set bits for all rows in a single column. void setBits(size_t column_idx, size_t rows); bool empty() const { return rows_mask_by_column_id.empty(); } size_t size() const { return rows_mask_by_column_id.size(); } diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 146043456bd..e4b655cdcf9 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -452,7 +452,7 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe return buf.eof() ? type : nullptr; } case FormatSettings::EscapingRule::JSON: - return getDataTypeFromJSONField(field); + return JSONUtils::getDataTypeFromField(field); case FormatSettings::EscapingRule::CSV: { if (!format_settings.csv.input_format_use_best_effort_in_schema_inference) diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index eb9a78ad734..8ead3e99e46 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -26,566 +26,578 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -template -static std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) +namespace JSONUtils { - skipWhitespaceIfAny(in); - char * pos = in.position(); - size_t balance = 0; - bool quotes = false; - size_t number_of_rows = 0; - - while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast(pos - in.position()) < min_chunk_size || number_of_rows < min_rows)) + template + static std::pair + fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) { - const auto current_object_size = memory.size() + static_cast(pos - in.position()); - if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size) - throw ParsingException("Size of JSON object is extremely large. Expected not greater than " + - std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) + - " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", ErrorCodes::INCORRECT_DATA); + skipWhitespaceIfAny(in); - if (quotes) + char * pos = in.position(); + size_t balance = 0; + bool quotes = false; + size_t number_of_rows = 0; + + while (loadAtPosition(in, memory, pos) + && (balance || memory.size() + static_cast(pos - in.position()) < min_chunk_size || number_of_rows < min_rows)) { - pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); + const auto current_object_size = memory.size() + static_cast(pos - in.position()); + if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size) + throw ParsingException( + "Size of JSON object is extremely large. Expected not greater than " + std::to_string(min_chunk_size) + + " bytes, but current is " + std::to_string(current_object_size) + + " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", + ErrorCodes::INCORRECT_DATA); - if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); - else if (pos == in.buffer().end()) - continue; - - if (*pos == '\\') + if (quotes) { - ++pos; - if (loadAtPosition(in, memory, pos)) + pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); + + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) + continue; + + if (*pos == '\\') + { ++pos; + if (loadAtPosition(in, memory, pos)) + ++pos; + } + else if (*pos == '"') + { + ++pos; + quotes = false; + } } - else if (*pos == '"') + else { - ++pos; - quotes = false; + pos = find_first_symbols(pos, in.buffer().end()); + + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) + continue; + + else if (*pos == opening_bracket) + { + ++balance; + ++pos; + } + else if (*pos == closing_bracket) + { + --balance; + ++pos; + } + else if (*pos == '\\') + { + ++pos; + if (loadAtPosition(in, memory, pos)) + ++pos; + } + else if (*pos == '"') + { + quotes = true; + ++pos; + } + + if (balance == 0) + ++number_of_rows; } } - else - { - pos = find_first_symbols(pos, in.buffer().end()); - if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); - else if (pos == in.buffer().end()) - continue; - - else if (*pos == opening_bracket) - { - ++balance; - ++pos; - } - else if (*pos == closing_bracket) - { - --balance; - ++pos; - } - else if (*pos == '\\') - { - ++pos; - if (loadAtPosition(in, memory, pos)) - ++pos; - } - else if (*pos == '"') - { - quotes = true; - ++pos; - } - - if (balance == 0) - ++number_of_rows; - } + saveUpToPosition(in, memory, pos); + return {loadAtPosition(in, memory, pos), number_of_rows}; } - saveUpToPosition(in, memory, pos); - return {loadAtPosition(in, memory, pos), number_of_rows}; -} - -template -static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in) -{ - Memory memory; - fileSegmentationEngineJSONEachRowImpl(in, memory, 0, 1); - return String(memory.data(), memory.size()); -} - -template -DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field) -{ - if (field.isNull()) - return nullptr; - - if (field.isBool()) - return DataTypeFactory::instance().get("Nullable(Bool)"); - - if (field.isInt64() || field.isUInt64() || field.isDouble()) - return makeNullable(std::make_shared()); - - if (field.isString()) - return makeNullable(std::make_shared()); - - if (field.isArray()) + template + static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in) { - auto array = field.getArray(); + Memory memory; + fileSegmentationEngineJSONEachRowImpl(in, memory, 0, 1); + return String(memory.data(), memory.size()); + } - /// Return nullptr in case of empty array because we cannot determine nested type. - if (array.size() == 0) + template + DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field) + { + if (field.isNull()) return nullptr; - DataTypes nested_data_types; - /// If this array contains fields with different types we will treat it as Tuple. - bool is_tuple = false; - for (const auto element : array) + if (field.isBool()) + return DataTypeFactory::instance().get("Nullable(Bool)"); + + if (field.isInt64() || field.isUInt64() || field.isDouble()) + return makeNullable(std::make_shared()); + + if (field.isString()) + return makeNullable(std::make_shared()); + + if (field.isArray()) { - auto type = getDataTypeFromJSONFieldImpl(element); - if (!type) + auto array = field.getArray(); + + /// Return nullptr in case of empty array because we cannot determine nested type. + if (array.size() == 0) return nullptr; - if (!nested_data_types.empty() && type->getName() != nested_data_types.back()->getName()) - is_tuple = true; + DataTypes nested_data_types; + /// If this array contains fields with different types we will treat it as Tuple. + bool is_tuple = false; + for (const auto element : array) + { + auto type = getDataTypeFromJSONFieldImpl(element); + if (!type) + return nullptr; - nested_data_types.push_back(std::move(type)); + if (!nested_data_types.empty() && type->getName() != nested_data_types.back()->getName()) + is_tuple = true; + + nested_data_types.push_back(std::move(type)); + } + + if (is_tuple) + return std::make_shared(nested_data_types); + + return std::make_shared(nested_data_types.back()); } - if (is_tuple) - return std::make_shared(nested_data_types); + if (field.isObject()) + { + auto object = field.getObject(); + DataTypePtr value_type; + bool is_object = false; + for (const auto key_value_pair : object) + { + auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second); + if (!type) + continue; - return std::make_shared(nested_data_types.back()); + if (isObject(type)) + { + is_object = true; + break; + } + + if (!value_type) + { + value_type = type; + } + else if (!value_type->equals(*type)) + { + is_object = true; + break; + } + } + + if (is_object) + return std::make_shared("json", true); + + if (value_type) + return std::make_shared(std::make_shared(), value_type); + + return nullptr; + } + + throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"}; } - if (field.isObject()) + auto getJSONParserAndElement() { - auto object = field.getObject(); - DataTypePtr value_type; - bool is_object = false; - for (const auto key_value_pair : object) +#if USE_SIMDJSON + return std::pair(); +#elif USE_RAPIDJSON + return std::pair(); +#else + return std::pair(); +#endif + } + + DataTypePtr getDataTypeFromJSONField(const String & field) + { + auto [parser, element] = getJSONParserAndElement(); + bool parsed = parser.parse(field, element); + if (!parsed) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field); + + return getDataTypeFromJSONFieldImpl(element); + } + + template + static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor) + { + String line = readJSONEachRowLineIntoStringImpl(in); + auto [parser, element] = getJSONParserAndElement(); + bool parsed = parser.parse(line, element); + if (!parsed) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", line); + + auto fields = extractor.extract(element); + + DataTypes data_types; + data_types.reserve(fields.size()); + for (const auto & field : fields) + data_types.push_back(getDataTypeFromJSONFieldImpl(field)); + + /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings. + /// Should we try to parse data inside strings somehow in this case? + + return data_types; + } + + std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) + { + return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1); + } + + std::pair + fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) + { + return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows); + } + + struct JSONEachRowFieldsExtractor + { + template + std::vector extract(const Element & element) { - auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second); - if (!type) - continue; + /// {..., "" : , ...} - if (isObject(type)) + if (!element.isObject()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object"); + + auto object = element.getObject(); + std::vector fields; + fields.reserve(object.size()); + column_names.reserve(object.size()); + for (const auto & key_value_pair : object) { - is_object = true; - break; + column_names.emplace_back(key_value_pair.first); + fields.push_back(key_value_pair.second); } - if (!value_type) + return fields; + } + + std::vector column_names; + }; + + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings) + { + JSONEachRowFieldsExtractor extractor; + auto data_types + = determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); + NamesAndTypesList result; + for (size_t i = 0; i != extractor.column_names.size(); ++i) + result.emplace_back(extractor.column_names[i], data_types[i]); + return result; + } + + struct JSONCompactEachRowFieldsExtractor + { + template + std::vector extract(const Element & element) + { + /// [..., , ...] + if (!element.isArray()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array"); + + auto array = element.getArray(); + std::vector fields; + fields.reserve(array.size()); + for (size_t i = 0; i != array.size(); ++i) + fields.push_back(array[i]); + return fields; + } + }; + + DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings) + { + JSONCompactEachRowFieldsExtractor extractor; + return determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); + } + + + bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) + { + /// For JSONEachRow we can safely skip whitespace characters + skipWhitespaceIfAny(buf); + return buf.eof() || *buf.position() == '['; + } + + bool readField( + ReadBuffer & in, + IColumn & column, + const DataTypePtr & type, + const SerializationPtr & serialization, + const String & column_name, + const FormatSettings & format_settings, + bool yield_strings) + { + try + { + bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); + + if (yield_strings) { - value_type = type; + String str; + readJSONString(str, in); + + ReadBufferFromString buf(str); + + if (as_nullable) + return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization); + + serialization->deserializeWholeText(column, buf, format_settings); + return true; } - else if (!value_type->equals(*type)) + + if (as_nullable) + return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization); + + serialization->deserializeTextJSON(column, in, format_settings); + return true; + } + catch (Exception & e) + { + e.addMessage("(while reading the value of key " + column_name + ")"); + throw; + } + } + + DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers) + { + if (allow_bools_as_numbers) + { + auto not_nullable_first = removeNullable(first); + auto not_nullable_second = removeNullable(second); + /// Check if we have Bool and Number and if so make the result type Number + bool bool_type_presents = isBool(not_nullable_first) || isBool(not_nullable_second); + bool number_type_presents = isNumber(not_nullable_first) || isNumber(not_nullable_second); + if (bool_type_presents && number_type_presents) { - is_object = true; - break; + if (isBool(not_nullable_first)) + return second; + return first; } } - if (is_object) - return std::make_shared("json", true); + /// If we have Map and Object, make result type Object + bool object_type_presents = isObject(first) || isObject(second); + bool map_type_presents = isMap(first) || isMap(second); + if (object_type_presents && map_type_presents) + { + if (isObject(first)) + return first; + return second; + } - if (value_type) - return std::make_shared(std::make_shared(), value_type); + /// If we have different Maps, make result type Object + if (isMap(first) && isMap(second) && !first->equals(*second)) + return std::make_shared("json", true); return nullptr; } - throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"}; -} - -auto getJSONParserAndElement() -{ -#if USE_SIMDJSON - return std::pair(); -#elif USE_RAPIDJSON - return std::pair(); -#else - return std::pair(); -#endif -} - -DataTypePtr getDataTypeFromJSONField(const String & field) -{ - auto [parser, element] = getJSONParserAndElement(); - bool parsed = parser.parse(field, element); - if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field); - - return getDataTypeFromJSONFieldImpl(element); -} - -template -static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor) -{ - String line = readJSONEachRowLineIntoStringImpl(in); - auto [parser, element] = getJSONParserAndElement(); - bool parsed = parser.parse(line, element); - if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", line); - - auto fields = extractor.extract(element); - - DataTypes data_types; - data_types.reserve(fields.size()); - for (const auto & field : fields) - data_types.push_back(getDataTypeFromJSONFieldImpl(field)); - - /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings. - /// Should we try to parse data inside strings somehow in this case? - - return data_types; -} - -std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) -{ - return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1); -} - -std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) -{ - return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows); -} - -struct JSONEachRowFieldsExtractor -{ - template - std::vector extract(const Element & element) + void writeFieldDelimiter(WriteBuffer & out, size_t new_lines) { - /// {..., "" : , ...} - - if (!element.isObject()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object"); - - auto object = element.getObject(); - std::vector fields; - fields.reserve(object.size()); - column_names.reserve(object.size()); - for (const auto & key_value_pair : object) - { - column_names.emplace_back(key_value_pair.first); - fields.push_back(key_value_pair.second); - } - - return fields; + writeChar(',', out); + writeChar('\n', new_lines, out); } - std::vector column_names; -}; + void writeFieldCompactDelimiter(WriteBuffer & out) { writeCString(", ", out); } -NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings) -{ - JSONEachRowFieldsExtractor extractor; - auto data_types = determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); - NamesAndTypesList result; - for (size_t i = 0; i != extractor.column_names.size(); ++i) - result.emplace_back(extractor.column_names[i], data_types[i]); - return result; -} - -struct JSONCompactEachRowFieldsExtractor -{ - template - std::vector extract(const Element & element) + template + void writeTitle(const char * title, WriteBuffer & out, size_t indent) { - /// [..., , ...] - if (!element.isArray()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array"); - - auto array = element.getArray(); - std::vector fields; - fields.reserve(array.size()); - for (size_t i = 0; i != array.size(); ++i) - fields.push_back(array[i]); - return fields; + writeChar('\t', indent, out); + writeChar('"', out); + writeCString(title, out); + if constexpr (with_space) + writeCString("\": ", out); + else + writeCString("\":\n", out); } -}; -DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings) -{ - JSONCompactEachRowFieldsExtractor extractor; - return determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); -} - - -bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) -{ - /// For JSONEachRow we can safely skip whitespace characters - skipWhitespaceIfAny(buf); - return buf.eof() || *buf.position() == '['; -} - -bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings) -{ - try + void writeObjectStart(WriteBuffer & out, size_t indent, const char * title) { - bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); + if (title) + writeTitle(title, out, indent); + writeChar('\t', indent, out); + writeCString("{\n", out); + } + + void writeObjectEnd(WriteBuffer & out, size_t indent) + { + writeChar('\n', out); + writeChar('\t', indent, out); + writeChar('}', out); + } + + void writeArrayStart(WriteBuffer & out, size_t indent, const char * title) + { + if (title) + writeTitle(title, out, indent); + writeChar('\t', indent, out); + writeCString("[\n", out); + } + + void writeCompactArrayStart(WriteBuffer & out, size_t indent, const char * title) + { + if (title) + writeTitle(title, out, indent); + else + writeChar('\t', indent, out); + writeCString("[", out); + } + + void writeArrayEnd(WriteBuffer & out, size_t indent) + { + writeChar('\n', out); + writeChar('\t', indent, out); + writeChar(']', out); + } + + void writeCompactArrayEnd(WriteBuffer & out) { writeChar(']', out); } + + void writeFieldFromColumn( + const IColumn & column, + const ISerialization & serialization, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + const std::optional & name, + size_t indent) + { + if (name.has_value()) + writeTitle(name->data(), out, indent); if (yield_strings) { - String str; - readJSONString(str, in); + WriteBufferFromOwnString buf; - ReadBufferFromString buf(str); - - if (as_nullable) - return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization); - - serialization->deserializeWholeText(column, buf, format_settings); - return true; + serialization.serializeText(column, row_num, buf, settings); + writeJSONString(buf.str(), out, settings); } - - if (as_nullable) - return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization); - - serialization->deserializeTextJSON(column, in, format_settings); - return true; + else + serialization.serializeTextJSON(column, row_num, out, settings); } - catch (Exception & e) - { - e.addMessage("(while reading the value of key " + column_name + ")"); - throw; - } -} -DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers) -{ - if (allow_bools_as_numbers) + void writeColumns( + const Columns & columns, + const NamesAndTypes & fields, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + size_t indent) { - auto not_nullable_first = removeNullable(first); - auto not_nullable_second = removeNullable(second); - /// Check if we have Bool and Number and if so make the result type Number - bool bool_type_presents = isBool(not_nullable_first) || isBool(not_nullable_second); - bool number_type_presents = isNumber(not_nullable_first) || isNumber(not_nullable_second); - if (bool_type_presents && number_type_presents) + for (size_t i = 0; i < columns.size(); ++i) { - if (isBool(not_nullable_first)) - return second; - return first; + if (i != 0) + writeFieldDelimiter(out); + writeFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out, fields[i].name, indent); } } - /// If we have Map and Object, make result type Object - bool object_type_presents = isObject(first) || isObject(second); - bool map_type_presents = isMap(first) || isMap(second); - if (object_type_presents && map_type_presents) + void writeCompactColumns( + const Columns & columns, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out) { - if (isObject(first)) - return first; - return second; - } - - /// If we have different Maps, make result type Object - if (isMap(first) && isMap(second) && !first->equals(*second)) - return std::make_shared("json", true); - - return nullptr; -} - -void writeJSONFieldDelimiter(WriteBuffer & out, size_t new_lines) -{ - writeChar(',', out); - writeChar('\n', new_lines, out); -} - -void writeJSONFieldCompactDelimiter(WriteBuffer & out) -{ - writeCString(", ", out); -} - -template -void writeJSONTitle(const char * title, WriteBuffer & out, size_t indent) -{ - writeChar('\t', indent, out); - writeChar('"', out); - writeCString(title, out); - if constexpr (with_space) - writeCString("\": ", out); - else - writeCString("\":\n", out); -} - -void writeJSONObjectStart(WriteBuffer & out, size_t indent, const char * title) -{ - if (title) - writeJSONTitle(title, out, indent); - writeChar('\t', indent, out); - writeCString("{\n", out); -} - -void writeJSONObjectEnd(WriteBuffer & out, size_t indent) -{ - writeChar('\n', out); - writeChar('\t', indent, out); - writeChar('}', out); -} - -void writeJSONArrayStart(WriteBuffer & out, size_t indent, const char * title) -{ - if (title) - writeJSONTitle(title, out, indent); - writeChar('\t', indent, out); - writeCString("[\n", out); -} - -void writeJSONCompactArrayStart(WriteBuffer & out, size_t indent, const char * title) -{ - if (title) - writeJSONTitle(title, out, indent); - else - writeChar('\t', indent, out); - writeCString("[", out); -} - -void writeJSONArrayEnd(WriteBuffer & out, size_t indent) -{ - writeChar('\n', out); - writeChar('\t', indent, out); - writeChar(']', out); -} - -void writeJSONCompactArrayEnd(WriteBuffer & out) -{ - writeChar(']', out); -} - -void writeJSONFieldFromColumn( - const IColumn & column, - const ISerialization & serialization, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out, - const std::optional & name, - size_t indent) -{ - if (name.has_value()) - writeJSONTitle(name->data(), out, indent); - - if (yield_strings) - { - WriteBufferFromOwnString buf; - - serialization.serializeText(column, row_num, buf, settings); - writeJSONString(buf.str(), out, settings); - } - else - serialization.serializeTextJSON(column, row_num, out, settings); -} - -void writeJSONColumns( - const Columns & columns, - const NamesAndTypes & fields, - const Serializations & serializations, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out, - size_t indent) -{ - for (size_t i = 0; i < columns.size(); ++i) - { - if (i != 0) - writeJSONFieldDelimiter(out); - writeJSONFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out, fields[i].name, indent); - } -} - -void writeJSONCompactColumns( - const Columns & columns, - const Serializations & serializations, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out) -{ - for (size_t i = 0; i < columns.size(); ++i) - { - if (i != 0) - writeJSONFieldCompactDelimiter(out); - writeJSONFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out); - } -} - -void writeJSONMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out) -{ - writeJSONArrayStart(out, 1, "meta"); - - for (size_t i = 0; i < fields.size(); ++i) - { - writeJSONObjectStart(out, 2); - - writeJSONTitle("name", out, 3); - writeDoubleQuoted(fields[i].name, out); - writeJSONFieldDelimiter(out); - writeJSONTitle("type", out, 3); - writeJSONString(fields[i].type->getName(), out, settings); - writeJSONObjectEnd(out, 2); - - if (i + 1 < fields.size()) - writeJSONFieldDelimiter(out); - } - - writeJSONArrayEnd(out, 1); -} - -void writeJSONAdditionalInfo( - size_t rows, - size_t rows_before_limit, - bool applied_limit, - const Stopwatch & watch, - const Progress & progress, - bool write_statistics, - WriteBuffer & out) -{ - writeJSONFieldDelimiter(out, 2); - writeJSONTitle("rows", out, 1); - writeIntText(rows, out); - - if (applied_limit) - { - writeJSONFieldDelimiter(out, 2); - writeJSONTitle("rows_before_limit_at_least", out, 1); - writeIntText(rows_before_limit, out); - } - - if (write_statistics) - { - writeJSONFieldDelimiter(out, 2); - writeJSONObjectStart(out, 1, "statistics"); - - writeJSONTitle("elapsed", out, 2); - writeText(watch.elapsedSeconds(), out); - writeJSONFieldDelimiter(out); - - writeJSONTitle("rows_read", out, 2); - writeText(progress.read_rows.load(), out); - writeJSONFieldDelimiter(out); - - writeJSONTitle("bytes_read", out, 2); - writeText(progress.read_bytes.load(), out); - - writeJSONObjectEnd(out, 1); - } -} - -void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8) -{ - for (auto & field : fields) - { - if (!field.type->textCanContainOnlyValidUTF8()) - need_validate_utf8 = true; - - WriteBufferFromOwnString buf; + for (size_t i = 0; i < columns.size(); ++i) { - WriteBufferValidUTF8 validating_buf(buf); - writeJSONString(field.name, validating_buf, settings); + if (i != 0) + writeFieldCompactDelimiter(out); + writeFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out); } - field.name = buf.str().substr(1, buf.str().size() - 2); } + + void writeMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out) + { + writeArrayStart(out, 1, "meta"); + + for (size_t i = 0; i < fields.size(); ++i) + { + writeObjectStart(out, 2); + + writeTitle("name", out, 3); + writeDoubleQuoted(fields[i].name, out); + writeFieldDelimiter(out); + writeTitle("type", out, 3); + writeJSONString(fields[i].type->getName(), out, settings); + writeObjectEnd(out, 2); + + if (i + 1 < fields.size()) + writeFieldDelimiter(out); + } + + writeArrayEnd(out, 1); + } + + void writeAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out) + { + writeFieldDelimiter(out, 2); + writeTitle("rows", out, 1); + writeIntText(rows, out); + + if (applied_limit) + { + writeFieldDelimiter(out, 2); + writeTitle("rows_before_limit_at_least", out, 1); + writeIntText(rows_before_limit, out); + } + + if (write_statistics) + { + writeFieldDelimiter(out, 2); + writeObjectStart(out, 1, "statistics"); + + writeTitle("elapsed", out, 2); + writeText(watch.elapsedSeconds(), out); + writeFieldDelimiter(out); + + writeTitle("rows_read", out, 2); + writeText(progress.read_rows.load(), out); + writeFieldDelimiter(out); + + writeTitle("bytes_read", out, 2); + writeText(progress.read_bytes.load(), out); + + writeObjectEnd(out, 1); + } + } + + void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8) + { + for (auto & field : fields) + { + if (!field.type->textCanContainOnlyValidUTF8()) + need_validate_utf8 = true; + + WriteBufferFromOwnString buf; + { + WriteBufferValidUTF8 validating_buf(buf); + writeJSONString(field.name, validating_buf, settings); + } + field.name = buf.str().substr(1, buf.str().size() - 2); + } + } + } } diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index b4b34498311..f2aba3cbcb5 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -13,85 +13,97 @@ namespace DB { -std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); -std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); +namespace JSONUtils +{ + std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); + std::pair + fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); + /// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. + /// JSON array with different nested types is treated as Tuple. + /// If cannot convert (for example when field contains null), return nullptr. + DataTypePtr getDataTypeFromField(const String & field); -/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. -/// JSON array with different nested types is treated as Tuple. -/// If cannot convert (for example when field contains null), return nullptr. -DataTypePtr getDataTypeFromJSONField(const String & field); + /// Read row in JSONEachRow format and try to determine type for each field. + /// Return list of names and types. + /// If cannot determine the type of some field, return nullptr for it. + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings); -/// Read row in JSONEachRow format and try to determine type for each field. -/// Return list of names and types. -/// If cannot determine the type of some field, return nullptr for it. -NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings); + /// Read row in JSONCompactEachRow format and try to determine type for each field. + /// If cannot determine the type of some field, return nullptr for it. + DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings); -/// Read row in JSONCompactEachRow format and try to determine type for each field. -/// If cannot determine the type of some field, return nullptr for it. -DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings); + bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); -bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); + bool readField( + ReadBuffer & in, + IColumn & column, + const DataTypePtr & type, + const SerializationPtr & serialization, + const String & column_name, + const FormatSettings & format_settings, + bool yield_strings); -bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings); + DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers); -DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers); + void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8); -void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8); + /// Functions helpers for writing JSON data to WriteBuffer. + void writeFieldDelimiter(WriteBuffer & out, size_t new_lines = 1); -/// Functions helpers for writing JSON data to WriteBuffer. + void writeFieldCompactDelimiter(WriteBuffer & out); -void writeJSONFieldDelimiter(WriteBuffer & out, size_t new_lines = 1); + void writeObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); -void writeJSONFieldCompactDelimiter(WriteBuffer & out); + void writeObjectEnd(WriteBuffer & out, size_t indent = 0); -void writeJSONObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + void writeArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); -void writeJSONObjectEnd(WriteBuffer & out, size_t indent = 0); + void writeCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); -void writeJSONArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + void writeArrayEnd(WriteBuffer & out, size_t indent = 0); -void writeJSONCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + void writeCompactArrayEnd(WriteBuffer & out); -void writeJSONArrayEnd(WriteBuffer & out, size_t indent = 0); + void writeFieldFromColumn( + const IColumn & column, + const ISerialization & serialization, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + const std::optional & name = std::nullopt, + size_t indent = 0); -void writeJSONCompactArrayEnd(WriteBuffer & out); + void writeColumns( + const Columns & columns, + const NamesAndTypes & fields, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + size_t indent = 0); -void writeJSONFieldFromColumn( - const IColumn & column, - const ISerialization & serialization, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out, - const std::optional & name = std::nullopt, - size_t indent = 0); + void writeCompactColumns( + const Columns & columns, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out); -void writeJSONColumns(const Columns & columns, - const NamesAndTypes & fields, - const Serializations & serializations, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out, - size_t indent = 0); + void writeMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out); + + void writeAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out); +} -void writeJSONCompactColumns(const Columns & columns, - const Serializations & serializations, - size_t row_num, - bool yield_strings, - const FormatSettings & settings, - WriteBuffer & out); - -void writeJSONMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out); - -void writeJSONAdditionalInfo( - size_t rows, - size_t rows_before_limit, - bool applied_limit, - const Stopwatch & watch, - const Progress & progress, - bool write_statistics, - WriteBuffer & out); } diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 7630d7bb699..d369eedceea 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -221,12 +221,12 @@ void registerInputFormatJSONAsString(FormatFactory & factory) void registerFileSegmentationEngineJSONAsString(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONAsString", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONAsString", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerJSONAsStringSchemaReader(FormatFactory & factory) @@ -251,12 +251,12 @@ void registerInputFormatJSONAsObject(FormatFactory & factory) void registerNonTrivialPrefixAndSuffixCheckerJSONAsObject(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsObject", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsObject", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONAsObject", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONAsObject", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerJSONAsObjectSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp index b2c4a8b5283..0e4e74e14c4 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp @@ -96,7 +96,7 @@ size_t JSONColumnsBaseBlockInputFormat::readColumn( do { - readFieldImpl(*in, column, type, serialization, column_name, format_settings, false); + JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, false); } while (!reader->checkColumnEndOrSkipFieldDelimiter()); @@ -185,7 +185,7 @@ void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const Data { auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second) { - return getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers); + return JSONUtils::getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers); }; chooseResultColumnType(type, new_type, common_type_checker, nullptr, column_name, row); } @@ -260,7 +260,7 @@ DataTypePtr JSONColumnsBaseSchemaReader::readColumnAndGetDataType(const String & } readJSONField(field, in); - DataTypePtr field_type = getDataTypeFromJSONField(field); + DataTypePtr field_type = JSONUtils::getDataTypeFromField(field); chooseResulType(column_type, field_type, column_name, rows_read); ++rows_read; } diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp index bd920bd6367..832f65e4463 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp @@ -48,9 +48,9 @@ void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk) void JSONColumnsBaseBlockOutputFormat::writeColumnEnd(bool is_last) { - writeJSONCompactArrayEnd(*ostr); + JSONUtils::writeCompactArrayEnd(*ostr); if (!is_last) - writeJSONFieldDelimiter(*ostr); + JSONUtils::writeFieldDelimiter(*ostr); } void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const ISerialization & serialization) @@ -58,7 +58,7 @@ void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const for (size_t i = 0; i != column.size(); ++i) { if (i != 0) - writeJSONFieldCompactDelimiter(*ostr); + JSONUtils::writeFieldCompactDelimiter(*ostr); serialization.serializeTextJSON(column, i, *ostr, format_settings); } } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp index c018751f1fb..bf8c50b923d 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp @@ -20,17 +20,17 @@ JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, c void JSONColumnsBlockOutputFormat::writeChunkStart() { - writeJSONObjectStart(*ostr, indent); + JSONUtils::writeObjectStart(*ostr, indent); } void JSONColumnsBlockOutputFormat::writeColumnStart(size_t column_index) { - writeJSONCompactArrayStart(*ostr, indent + 1, fields[column_index].name.data()); + JSONUtils::writeCompactArrayStart(*ostr, indent + 1, fields[column_index].name.data()); } void JSONColumnsBlockOutputFormat::writeChunkEnd() { - writeJSONObjectEnd(*ostr, indent); + JSONUtils::writeObjectEnd(*ostr, indent); writeChar('\n', *ostr); } diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index 2b41f1d4a4d..1887a10e9f7 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -16,7 +16,7 @@ JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputForm : JSONColumnsBlockOutputFormat(out_, header_, format_settings_, 1) { bool need_validate_utf8 = false; - makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8); + JSONUtils::makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8); if (need_validate_utf8) { @@ -27,8 +27,8 @@ JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputForm void JSONColumnsWithMetadataBlockOutputFormat::writePrefix() { - writeJSONObjectStart(*ostr); - writeJSONMetadata(fields, format_settings, *ostr); + JSONUtils::writeObjectStart(*ostr); + JSONUtils::writeMetadata(fields, format_settings, *ostr); } void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix() @@ -39,13 +39,13 @@ void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix() void JSONColumnsWithMetadataBlockOutputFormat::writeChunkStart() { - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "data"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "data"); } void JSONColumnsWithMetadataBlockOutputFormat::writeChunkEnd() { - writeJSONObjectEnd(*ostr, indent); + JSONUtils::writeObjectEnd(*ostr, indent); } void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk) @@ -55,19 +55,19 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk) throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR); const auto & columns = chunk.getColumns(); - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "extremes"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "extremes"); writeExtremesElement("min", columns, 0); - writeJSONFieldDelimiter(*ostr); + JSONUtils::writeFieldDelimiter(*ostr); writeExtremesElement("max", columns, 1); - writeJSONObjectEnd(*ostr, 1); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONColumnsWithMetadataBlockOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeJSONObjectStart(*ostr, 2, title); - writeJSONColumns(columns, fields, serializations, row_num, false, format_settings, *ostr, 3); - writeJSONObjectEnd(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 2, title); + JSONUtils::writeColumns(columns, fields, serializations, row_num, false, format_settings, *ostr, 3); + JSONUtils::writeObjectEnd(*ostr, 2); } void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) @@ -77,10 +77,10 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR); const auto & columns = chunk.getColumns(); - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "totals"); - writeJSONColumns(columns, fields, serializations, 0, false, format_settings, *ostr, 2); - writeJSONObjectEnd(*ostr, 1); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "totals"); + JSONUtils::writeColumns(columns, fields, serializations, 0, false, format_settings, *ostr, 2); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() @@ -89,7 +89,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() if (outside_statistics) statistics = std::move(*outside_statistics); - writeJSONAdditionalInfo( + JSONUtils::writeAdditionalInfo( rows, statistics.rows_before_limit, statistics.applied_limit, @@ -98,7 +98,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() format_settings.write_statistics, *ostr); - writeJSONObjectEnd(*ostr); + JSONUtils::writeObjectEnd(*ostr); writeChar('\n', *ostr); ostr->next(); } diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp index f748f619cb5..6f240d5e922 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp @@ -13,17 +13,17 @@ JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBu void JSONCompactColumnsBlockOutputFormat::writeChunkStart() { - writeJSONArrayStart(*ostr); + JSONUtils::writeArrayStart(*ostr); } void JSONCompactColumnsBlockOutputFormat::writeColumnStart(size_t) { - writeJSONCompactArrayStart(*ostr, 1); + JSONUtils::writeCompactArrayStart(*ostr, 1); } void JSONCompactColumnsBlockOutputFormat::writeChunkEnd() { - writeJSONArrayEnd(*ostr); + JSONUtils::writeArrayEnd(*ostr); writeChar('\n', *ostr); } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 140f3fb41b3..ef59fc8f05a 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -109,7 +109,7 @@ std::vector JSONCompactEachRowFormatReader::readHeaderRow() bool JSONCompactEachRowFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & column_name) { skipWhitespaceIfAny(*in); - return readFieldImpl(*in, column, type, serialization, column_name, format_settings, yield_strings); + return JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, yield_strings); } bool JSONCompactEachRowFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out) @@ -189,7 +189,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader( bool allow_bools_as_numbers = format_settings_.json.read_bools_as_numbers; setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second) { - return getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); + return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); }); } @@ -209,7 +209,7 @@ DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes() if (in.eof()) return {}; - return readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings()); + return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings()); } void registerInputFormatJSONCompactEachRow(FormatFactory & factory) @@ -258,7 +258,7 @@ void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory) size_t min_rows = 1 + int(with_names) + int(with_types); factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) { - return fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows); + return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows); }); }; diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp index fbb4a8d9116..47b79b71ae2 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp @@ -21,50 +21,50 @@ JSONCompactRowOutputFormat::JSONCompactRowOutputFormat( void JSONCompactRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - writeJSONFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); + JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); ++field_number; } void JSONCompactRowOutputFormat::writeFieldDelimiter() { - writeJSONFieldCompactDelimiter(*ostr); + JSONUtils::writeFieldCompactDelimiter(*ostr); } void JSONCompactRowOutputFormat::writeRowStartDelimiter() { - writeJSONCompactArrayStart(*ostr, 2); + JSONUtils::writeCompactArrayStart(*ostr, 2); } void JSONCompactRowOutputFormat::writeRowEndDelimiter() { - writeJSONCompactArrayEnd(*ostr); + JSONUtils::writeCompactArrayEnd(*ostr); field_number = 0; ++row_count; } void JSONCompactRowOutputFormat::writeBeforeTotals() { - writeJSONFieldDelimiter(*ostr, 2); - writeJSONCompactArrayStart(*ostr, 1, "totals"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeCompactArrayStart(*ostr, 1, "totals"); } void JSONCompactRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { - writeJSONCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); } void JSONCompactRowOutputFormat::writeAfterTotals() { - writeJSONCompactArrayEnd(*ostr); + JSONUtils::writeCompactArrayEnd(*ostr); } void JSONCompactRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeJSONCompactArrayStart(*ostr, 2, title); - writeJSONCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); - writeJSONCompactArrayEnd(*ostr); + JSONUtils::writeCompactArrayStart(*ostr, 2, title); + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); + JSONUtils::writeCompactArrayEnd(*ostr); } void registerOutputFormatJSONCompact(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index e9d6b516feb..76af5bf02c5 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -140,7 +140,7 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns seen_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; const auto & serialization = serializations[index]; - read_columns[index] = readFieldImpl(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings); + read_columns[index] = JSONUtils::readField(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings); } inline bool JSONEachRowRowInputFormat::advanceToNextKey(size_t key_index) @@ -313,7 +313,7 @@ JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_str bool allow_bools_as_numbers = format_settings.json.read_bools_as_numbers; setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second) { - return getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); + return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); }); } @@ -350,7 +350,7 @@ NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool & return {}; } - return readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings); + return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings); } void registerInputFormatJSONEachRow(FormatFactory & factory) @@ -397,18 +397,18 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("JSONLines", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("NDJSON", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONEachRow", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONStringsEachRow", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONLines", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("NDJSON", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("JSONLines", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("NDJSON", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONLines", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("NDJSON", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerJSONEachRowSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index d0d50526a0d..fc2d3cb8133 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -18,7 +18,7 @@ JSONRowOutputFormat::JSONRowOutputFormat( { bool need_validate_utf8 = false; fields = header.getNamesAndTypes(); - makeNamesAndTypesWithValidUTF8(fields, settings, need_validate_utf8); + JSONUtils::makeNamesAndTypesWithValidUTF8(fields, settings, need_validate_utf8); if (need_validate_utf8) { @@ -32,34 +32,34 @@ JSONRowOutputFormat::JSONRowOutputFormat( void JSONRowOutputFormat::writePrefix() { - writeJSONObjectStart(*ostr); - writeJSONMetadata(fields, settings, *ostr); - writeJSONFieldDelimiter(*ostr, 2); - writeJSONArrayStart(*ostr, 1, "data"); + JSONUtils::writeObjectStart(*ostr); + JSONUtils::writeMetadata(fields, settings, *ostr); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeArrayStart(*ostr, 1, "data"); } void JSONRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - writeJSONFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr, fields[field_number].name, 3); + JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr, fields[field_number].name, 3); ++field_number; } void JSONRowOutputFormat::writeFieldDelimiter() { - writeJSONFieldDelimiter(*ostr); + JSONUtils::writeFieldDelimiter(*ostr); } void JSONRowOutputFormat::writeRowStartDelimiter() { - writeJSONObjectStart(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 2); } void JSONRowOutputFormat::writeRowEndDelimiter() { - writeJSONObjectEnd(*ostr, 2); + JSONUtils::writeObjectEnd(*ostr, 2); field_number = 0; ++row_count; } @@ -67,42 +67,42 @@ void JSONRowOutputFormat::writeRowEndDelimiter() void JSONRowOutputFormat::writeRowBetweenDelimiter() { - writeJSONFieldDelimiter(*ostr); + JSONUtils::writeFieldDelimiter(*ostr); } void JSONRowOutputFormat::writeSuffix() { - writeJSONArrayEnd(*ostr, 1); + JSONUtils::writeArrayEnd(*ostr, 1); } void JSONRowOutputFormat::writeBeforeTotals() { - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "totals"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "totals"); } void JSONRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { - writeJSONColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 2); + JSONUtils::writeColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 2); } void JSONRowOutputFormat::writeAfterTotals() { - writeJSONObjectEnd(*ostr, 1); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONRowOutputFormat::writeBeforeExtremes() { - writeJSONFieldDelimiter(*ostr, 2); - writeJSONObjectStart(*ostr, 1, "extremes"); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "extremes"); } void JSONRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeJSONObjectStart(*ostr, 2, title); - writeJSONColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 3); - writeJSONObjectEnd(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 2, title); + JSONUtils::writeColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 3); + JSONUtils::writeObjectEnd(*ostr, 2); } void JSONRowOutputFormat::writeMinExtreme(const Columns & columns, size_t row_num) @@ -117,7 +117,7 @@ void JSONRowOutputFormat::writeMaxExtreme(const Columns & columns, size_t row_nu void JSONRowOutputFormat::writeAfterExtremes() { - writeJSONObjectEnd(*ostr, 1); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONRowOutputFormat::finalizeImpl() @@ -126,7 +126,7 @@ void JSONRowOutputFormat::finalizeImpl() if (outside_statistics) statistics = std::move(*outside_statistics); - writeJSONAdditionalInfo( + JSONUtils::writeAdditionalInfo( row_count, statistics.rows_before_limit, statistics.applied_limit, @@ -135,7 +135,7 @@ void JSONRowOutputFormat::finalizeImpl() settings.write_statistics, *ostr); - writeJSONObjectEnd(*ostr); + JSONUtils::writeObjectEnd(*ostr); writeChar('\n', *ostr); ostr->next(); } From 46fa56c25cb0444a29c0c23bca16e0aee690d122 Mon Sep 17 00:00:00 2001 From: lingo-xp <36907211+lingo-xp@users.noreply.github.com> Date: Wed, 18 May 2022 18:02:15 +0800 Subject: [PATCH 218/615] [bug-fix] wrong default value in copyThroughBuffers --- src/Disks/DiskLocal.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 8abf0b24782..79bf525a639 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -437,7 +437,7 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr & to fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. } else - copyThroughBuffers(from_path, to_disk, to_path); /// Base implementation. + copyThroughBuffers(from_path, to_disk, to_path, true); /// Base implementation. } void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) @@ -445,7 +445,7 @@ void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ if (isSameDiskType(*this, *to_disk)) fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. else - copyThroughBuffers(from_dir, to_disk, to_dir); /// Base implementation. + copyThroughBuffers(from_dir, to_disk, to_dir, false); /// Base implementation. } SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const From 2b655ccb75f812bd8925fe8a23571ee8b58e1b33 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 18 May 2022 12:12:41 +0200 Subject: [PATCH 219/615] Speed up test 00157_cache_dictionary --- tests/queries/1_stateful/00157_cache_dictionary.sql | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index a8f9d12ab3a..15bd4cbe6d4 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -1,23 +1,28 @@ -- Tags: no-tsan, no-parallel +DROP TABLE IF EXISTS test.hits_1m; +CREATE TABLE test.hits_1m as test.hits; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000; + CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; CREATE DICTIONARY db_dict.cache_hits (WatchID UInt64, UserID UInt64, SearchPhrase String) PRIMARY KEY WatchID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PASSWORD '' DB 'test')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits_1m' PASSWORD '' DB 'test')) LIFETIME(MIN 1 MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 1 QUERY_WAIT_TIMEOUT_MILLISECONDS 60000)); SELECT count() FROM (SELECT WatchID, arrayDistinct(groupArray(dictGetUInt64( 'db_dict.cache_hits', 'UserID', toUInt64(WatchID)))) as arr -FROM test.hits PREWHERE WatchID % 5 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; +FROM test.hits_1m PREWHERE WatchID % 5 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; SELECT count() FROM (SELECT WatchID, arrayDistinct(groupArray(dictGetUInt64( 'db_dict.cache_hits', 'UserID', toUInt64(WatchID)))) as arr -FROM test.hits PREWHERE WatchID % 7 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; +FROM test.hits_1m PREWHERE WatchID % 7 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; SELECT count() FROM (SELECT WatchID, arrayDistinct(groupArray(dictGetUInt64( 'db_dict.cache_hits', 'UserID', toUInt64(WatchID)))) as arr -FROM test.hits PREWHERE WatchID % 13 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; +FROM test.hits_1m PREWHERE WatchID % 13 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; DROP DICTIONARY IF EXISTS db_dict.cache_hits; DROP DATABASE IF EXISTS db_dict; +DROP TABLE IF EXISTS hits_1m; From 44d4ada542b360f53a96eb1419e41c1d901ad909 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 18 May 2022 18:15:16 +0800 Subject: [PATCH 220/615] fixed code style --- tests/integration/test_hive_query/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 8c37fd81e7c..ab8d684ee35 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -390,6 +390,7 @@ def test_cache_read_bytes(started_cluster): break assert test_passed + def test_cache_dir_use(started_cluster): node = started_cluster.instances["h0_0_0"] result0 = node.exec_in_container( From 45463c27cce05979fd8f7a92220e1b6854342080 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 18 May 2022 12:43:43 +0200 Subject: [PATCH 221/615] Update cmake/ccache.cmake Co-authored-by: Azat Khuzhin --- cmake/ccache.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index acbb00caf56..b20706efa5e 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -28,7 +28,8 @@ string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACH set (CCACHE_MINIMUM_VERSION 3.3) if (CCACHE_VERSION VERSION_LESS_EQUAL ${CCACHE_MINIMUM_VERSION}) - message(FATAL_ERROR "Using ccache: no (found ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION}), the minimum required version is ${CCACHE_MINIMUM_VERSION}") + message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: no (found ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION}), the minimum required version is ${CCACHE_MINIMUM_VERSION}") + return() endif() message(STATUS "Using ccache: ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION})") From 134821eff8c9a6f5f0fc1cc97315f6529e394a18 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 18 May 2022 12:44:20 +0200 Subject: [PATCH 222/615] Fix build --- src/Formats/JSONUtils.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 8ead3e99e46..1ac58760516 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -121,7 +121,7 @@ namespace JSONUtils } template - DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field) + DataTypePtr getDataTypeFromFieldImpl(const Element & field) { if (field.isNull()) return nullptr; @@ -148,7 +148,7 @@ namespace JSONUtils bool is_tuple = false; for (const auto element : array) { - auto type = getDataTypeFromJSONFieldImpl(element); + auto type = getDataTypeFromFieldImpl(element); if (!type) return nullptr; @@ -171,7 +171,7 @@ namespace JSONUtils bool is_object = false; for (const auto key_value_pair : object) { - auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second); + auto type = getDataTypeFromFieldImpl(key_value_pair.second); if (!type) continue; @@ -215,14 +215,14 @@ namespace JSONUtils #endif } - DataTypePtr getDataTypeFromJSONField(const String & field) + DataTypePtr getDataTypeFromField(const String & field) { auto [parser, element] = getJSONParserAndElement(); bool parsed = parser.parse(field, element); if (!parsed) throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field); - return getDataTypeFromJSONFieldImpl(element); + return getDataTypeFromFieldImpl(element); } template @@ -239,7 +239,7 @@ namespace JSONUtils DataTypes data_types; data_types.reserve(fields.size()); for (const auto & field : fields) - data_types.push_back(getDataTypeFromJSONFieldImpl(field)); + data_types.push_back(getDataTypeFromFieldImpl(field)); /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings. /// Should we try to parse data inside strings somehow in this case? From 4b5eda4de9b25e072c0c2efaf6afbb49cdd75434 Mon Sep 17 00:00:00 2001 From: Vxider Date: Wed, 18 May 2022 11:14:08 +0000 Subject: [PATCH 223/615] fix cleanup --- src/Storages/WindowView/StorageWindowView.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index b90d5756e72..39c5467c5f8 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -902,8 +902,9 @@ inline void StorageWindowView::cleanup() auto alter_query = getCleanupQuery(); auto cleanup_context = Context::createCopy(getContext()); - cleanup_context->getClientInfo().setInitialQuery(); - cleanup_context->setInternalQuery(true); + cleanup_context->makeQueryContext(); + cleanup_context->setCurrentQueryId(""); + cleanup_context->getClientInfo().is_replicated_database_internal = true; InterpreterAlterQuery interpreter_alter(alter_query, cleanup_context); interpreter_alter.execute(); From 19462bdf9e96fd1271a96e827f683c656d907a56 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 18 May 2022 14:31:28 +0200 Subject: [PATCH 224/615] Drop filesystem cache before test --- tests/queries/1_stateful/00170_s3_cache.reference | 1 + tests/queries/1_stateful/00170_s3_cache.sql | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/1_stateful/00170_s3_cache.reference b/tests/queries/1_stateful/00170_s3_cache.reference index 96b02595c2e..04d610bc8d2 100644 --- a/tests/queries/1_stateful/00170_s3_cache.reference +++ b/tests/queries/1_stateful/00170_s3_cache.reference @@ -2,6 +2,7 @@ SET enable_filesystem_cache_on_write_operations=0; SET max_memory_usage='20G'; +SYSTEM DROP FILESYSTEM CACHE; SELECT count() FROM test.hits_s3; 8873898 SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; diff --git a/tests/queries/1_stateful/00170_s3_cache.sql b/tests/queries/1_stateful/00170_s3_cache.sql index febabb1c9e4..6549bcf5479 100644 --- a/tests/queries/1_stateful/00170_s3_cache.sql +++ b/tests/queries/1_stateful/00170_s3_cache.sql @@ -2,8 +2,11 @@ -- { echo } +SYSTEM DROP FILESYSTEM CACHE; + SET enable_filesystem_cache_on_write_operations=0; SET max_memory_usage='20G'; + SELECT count() FROM test.hits_s3; SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM test.hits_s3 ; From 3c85e8b455713433d0ac386125c38e0f35488fec Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 18 May 2022 12:36:13 +0000 Subject: [PATCH 225/615] Fix tests --- src/Storages/ColumnsDescription.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index a7af2433875..dcfcd81f6f7 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -31,6 +31,7 @@ #include #include +#include namespace DB { @@ -543,9 +544,16 @@ Names ColumnsDescription::getNamesOfPhysical() const std::optional ColumnsDescription::tryGetColumn(const GetColumnsOptions & options, const String & column_name) const { - auto column_description = tryGetColumnDescription(options, column_name); - if (column_description) - return NameAndTypePair{column_description->name, column_description->type}; + auto it = columns.get<1>().find(column_name); + if (it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & options.kind)) + return NameAndTypePair(it->name, it->type); + + if (options.with_subcolumns) + { + auto jt = subcolumns.get<0>().find(column_name); + if (jt != subcolumns.get<0>().end()) + return *jt; + } return {}; } From 8ca58cd5a56c3ffcd8222d1f273ff947788ac716 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 18 May 2022 15:23:58 +0200 Subject: [PATCH 226/615] Provide default value for mark_cache_size I played around with my local config.xml file. The minimal working example is this: 5368709120 localhost 9000 users.xml true Not specifying mark_cache_size made the server not start up: 2022.05.18 12:15:06.549078 [ 8728320 ] {} Application: Not found: mark_cache_size Looking at ClickHouse's ca. 100 server configuration options + sub-options, it seems that mark_cache_size is NOT special enough to require explicit configuration but instead that the behavior was unintended because no default value was provided. --- programs/local/LocalServer.cpp | 6 ++---- programs/server/Server.cpp | 7 +++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index f3fa7ff2bfa..8faa4e9f9c2 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -544,8 +544,7 @@ void LocalServer::processConfig() if (uncompressed_cache_size) global_context->setUncompressedCache(uncompressed_cache_size); - /// Size of cache for marks (index of MergeTree family of tables). It is necessary. - /// Specify default value for mark_cache_size explicitly! + /// Size of cache for marks (index of MergeTree family of tables). size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); if (mark_cache_size) global_context->setMarkCache(mark_cache_size); @@ -555,8 +554,7 @@ void LocalServer::processConfig() if (index_uncompressed_cache_size) global_context->setIndexUncompressedCache(index_uncompressed_cache_size); - /// Size of cache for index marks (index of MergeTree skip indices). It is necessary. - /// Specify default value for index_mark_cache_size explicitly! + /// Size of cache for index marks (index of MergeTree skip indices). size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0); if (index_mark_cache_size) global_context->setIndexMarkCache(index_mark_cache_size); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 2b1a0809143..4d1bfd084f3 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1344,8 +1344,8 @@ int Server::main(const std::vector & /*args*/) settings.async_insert_max_data_size, AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout_ms, .stale = settings.async_insert_stale_timeout_ms})); - /// Size of cache for marks (index of MergeTree family of tables). It is mandatory. - size_t mark_cache_size = config().getUInt64("mark_cache_size"); + /// Size of cache for marks (index of MergeTree family of tables). + size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); if (!mark_cache_size) LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); if (mark_cache_size > max_cache_size) @@ -1361,8 +1361,7 @@ int Server::main(const std::vector & /*args*/) if (index_uncompressed_cache_size) global_context->setIndexUncompressedCache(index_uncompressed_cache_size); - /// Size of cache for index marks (index of MergeTree skip indices). It is necessary. - /// Specify default value for index_mark_cache_size explicitly! + /// Size of cache for index marks (index of MergeTree skip indices). size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0); if (index_mark_cache_size) global_context->setIndexMarkCache(index_mark_cache_size); From 04f1b202294cf6d24bec2abc51bff1f2f2e24b54 Mon Sep 17 00:00:00 2001 From: Vxider Date: Wed, 18 May 2022 14:34:40 +0000 Subject: [PATCH 227/615] init inner window names --- src/Storages/WindowView/StorageWindowView.cpp | 25 ++++++++++++++ ...065_window_view_event_hop_watch_bounded.py | 34 ++++++++++--------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 60259e46733..3352699aa59 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1506,6 +1506,31 @@ void StorageWindowView::writeIntoWindowView( void StorageWindowView::startup() { + if (is_time_column_func_now) + inner_window_id_column_name = window_id_name; + else + { + Aliases aliases; + QueryAliasesVisitor(aliases).visit(mergeable_query); + auto inner_query_normalized = mergeable_query->clone(); + QueryNormalizer::Data normalizer_data(aliases, {}, false, getContext()->getSettingsRef(), false); + QueryNormalizer(normalizer_data).visit(inner_query_normalized); + auto inner_select_query = std::static_pointer_cast(inner_query_normalized); + auto t_sample_block + = InterpreterSelectQuery(inner_select_query, getContext(), SelectQueryOptions(QueryProcessingStage::WithMergeableState)) + .getSampleBlock(); + for (const auto & column : t_sample_block.getColumnsWithTypeAndName()) + { + if (startsWith(column.name, "windowID")) + { + inner_window_id_column_name = column.name; + break; + } + } + } + + inner_window_column_name = std::regex_replace(inner_window_id_column_name, std::regex("windowID"), is_tumble ? "tumble" : "hop"); + // Start the working thread clean_cache_task->activateAndSchedule(); fire_task->activateAndSchedule(); diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index 92d2b56ed34..fd9499b12b6 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import os import sys -import signal CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) @@ -25,31 +24,34 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") + client1.send("CREATE DATABASE IF NOT EXISTS 01065_window_view_event_hop_watch_bounded") client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.wv") + client1.send("DROP TABLE IF EXISTS 01065_window_view_event_hop_watch_bounded.mt") client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS `.inner.wv`") + client1.send("DROP TABLE IF EXISTS 01065_window_view_event_hop_watch_bounded.wv") client1.expect(prompt) client1.send( - "CREATE TABLE test.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" + "CREATE TABLE 01065_window_view_event_hop_watch_bounded.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW test.wv WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM test.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01065_window_view_event_hop_watch_bounded.wv ENGINE Memory WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01065_window_view_event_hop_watch_bounded.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect(prompt) - client1.send("WATCH test.wv") - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:00');") - client2.expect(prompt) - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:05');") - client2.expect(prompt) + client1.send("WATCH 01065_window_view_event_hop_watch_bounded.wv") + client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") + client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');") + client2.expect("Ok.") + client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:05');") + client2.expect("Ok.") client1.expect("1*" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:06');") - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:10');") - client2.expect(prompt) + client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:06');") + client2.expect("Ok.") + client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:10');") + client2.expect("Ok.") client1.expect("1*" + end_of_block) client1.expect("2*" + end_of_block) @@ -59,7 +61,7 @@ with client(name="client1>", log=log) as client1, client( if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) - client1.send("DROP TABLE test.wv") + client1.send("DROP TABLE 01065_window_view_event_hop_watch_bounded.wv") client1.expect(prompt) - client1.send("DROP TABLE test.mt") + client1.send("DROP TABLE 01065_window_view_event_hop_watch_bounded.mt") client1.expect(prompt) From 6098cf7cfe2223efbacad9d197b3e346cbdbee04 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 18 May 2022 17:46:23 +0300 Subject: [PATCH 228/615] Update src/Disks/DiskLocal.cpp --- src/Disks/DiskLocal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 79bf525a639..cbc04084795 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -437,7 +437,7 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr & to fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. } else - copyThroughBuffers(from_path, to_disk, to_path, true); /// Base implementation. + copyThroughBuffers(from_path, to_disk, to_path, /* copy_root_dir */ true); /// Base implementation. } void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) From 7ab0aa7882aeb1f8e13009c3311fb3cab0384a4e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 18 May 2022 17:46:28 +0300 Subject: [PATCH 229/615] Update src/Disks/DiskLocal.cpp --- src/Disks/DiskLocal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index cbc04084795..ec122b6555b 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -445,7 +445,7 @@ void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ if (isSameDiskType(*this, *to_disk)) fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. else - copyThroughBuffers(from_dir, to_disk, to_dir, false); /// Base implementation. + copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); /// Base implementation. } SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const From a0369fb9a6be015e0ce4fee405419d8531cdd54d Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 18 May 2022 14:51:21 +0000 Subject: [PATCH 230/615] Allow to use String type instead of Binary in Arrow/Parquet/ORC formats --- src/Core/Settings.h | 4 + src/Formats/FormatFactory.cpp | 7 +- src/Formats/FormatSettings.h | 3 + .../Formats/Impl/ArrowBlockOutputFormat.cpp | 2 +- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 2 + .../Formats/Impl/CHColumnToArrowColumn.cpp | 75 +++++++++++-------- .../Formats/Impl/CHColumnToArrowColumn.h | 6 +- .../Formats/Impl/ORCBlockOutputFormat.cpp | 2 + .../Formats/Impl/ParquetBlockOutputFormat.cpp | 2 +- ...c_arrow_parquet_string_as_string.reference | 3 + ...304_orc_arrow_parquet_string_as_string.sql | 6 ++ 11 files changed, 77 insertions(+), 35 deletions(-) create mode 100644 tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.reference create mode 100644 tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 29427c673ac..5f17b088813 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -698,6 +698,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \ M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \ + M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ @@ -735,6 +736,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if possible. Possible values: 0 - no rewrite, 1 - apply if possible, 2 - force rewrite all cross joins", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ + M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \ + \ + M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \ \ M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \ \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 96b52cd2423..4c1b23a75ab 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -99,6 +99,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; @@ -132,17 +133,19 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; + format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string; format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; - format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; - format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; + format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4f77fe099e1..e6f0a7d229e 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -81,6 +81,7 @@ struct FormatSettings bool allow_missing_columns = false; bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; + bool output_string_as_string = false; } arrow; struct @@ -148,6 +149,7 @@ struct FormatSettings bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; std::unordered_set skip_row_groups = {}; + bool output_string_as_string = false; } parquet; struct Pretty @@ -234,6 +236,7 @@ struct FormatSettings bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; std::unordered_set skip_stripes = {}; + bool output_string_as_string = false; } orc; /// For capnProto format we should determine how to diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp index 60408f13ff0..83eaefa8cf7 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp @@ -34,7 +34,7 @@ void ArrowBlockOutputFormat::consume(Chunk chunk) { const Block & header = getPort(PortKind::Main).getHeader(); ch_column_to_arrow_column - = std::make_unique(header, "Arrow", format_settings.arrow.low_cardinality_as_dictionary); + = std::make_unique(header, "Arrow", format_settings.arrow.low_cardinality_as_dictionary, format_settings.arrow.output_string_as_string); } ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunk, columns_num); diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index c792d828e44..543d09a48d3 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -36,6 +36,8 @@ #include #include +#include + /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 3f6a36e8e8c..bd5a6368291 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -168,6 +168,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values); template @@ -180,6 +181,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { const auto * column_array = assert_cast(column.get()); @@ -196,7 +198,7 @@ namespace DB /// Start new array. components_status = builder.Append(); checkStatus(components_status, nested_column->getName(), format_name); - fillArrowArray(column_name, nested_column, nested_type, null_bytemap, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], dictionary_values); + fillArrowArray(column_name, nested_column, nested_type, null_bytemap, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], output_string_as_string, dictionary_values); } } @@ -209,6 +211,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { const auto * column_tuple = assert_cast(column.get()); @@ -219,7 +222,7 @@ namespace DB for (size_t i = 0; i != column_tuple->tupleSize(); ++i) { ColumnPtr nested_column = column_tuple->getColumnPtr(i); - fillArrowArray(column_name + "." + std::to_string(i), nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end, dictionary_values); + fillArrowArray(column_name + "." + std::to_string(i), nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end, output_string_as_string, dictionary_values); } for (size_t i = start; i != end; ++i) @@ -267,6 +270,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { const auto * column_lc = assert_cast(column.get()); @@ -284,7 +288,7 @@ namespace DB auto dict_column = column_lc->getDictionary().getNestedColumn(); const auto & dict_type = assert_cast(column_type.get())->getDictionaryType(); - fillArrowArray(column_name, dict_column, dict_type, nullptr, values_builder.get(), format_name, 0, dict_column->size(), dictionary_values); + fillArrowArray(column_name, dict_column, dict_type, nullptr, values_builder.get(), format_name, 0, dict_column->size(), output_string_as_string, dictionary_values); status = values_builder->Finish(&dict_values); checkStatus(status, column->getName(), format_name); } @@ -321,6 +325,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { auto value_type = assert_cast(array_builder->type().get())->value_type(); @@ -328,7 +333,7 @@ namespace DB #define DISPATCH(ARROW_TYPE_ID, ARROW_TYPE) \ if (arrow::Type::ARROW_TYPE_ID == value_type->id()) \ { \ - fillArrowArrayWithLowCardinalityColumnDataImpl(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); \ + fillArrowArrayWithLowCardinalityColumnDataImpl(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); \ return; \ } @@ -338,7 +343,7 @@ namespace DB throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot fill arrow array with {} data.", column_type->getName()); } - template + template static void fillArrowArrayWithStringColumnData( ColumnPtr write_column, const PaddedPODArray * null_bytemap, @@ -348,7 +353,7 @@ namespace DB size_t end) { const auto & internal_column = assert_cast(*write_column); - arrow::BinaryBuilder & builder = assert_cast(*array_builder); + ArrowBuilder & builder = assert_cast(*array_builder); arrow::Status status; for (size_t string_i = start; string_i < end; ++string_i) @@ -442,6 +447,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { const String column_type_name = column_type->getFamilyName(); @@ -453,15 +459,21 @@ namespace DB DataTypePtr nested_type = assert_cast(column_type.get())->getNestedType(); const ColumnPtr & null_column = column_nullable->getNullMapColumnPtr(); const PaddedPODArray & bytemap = assert_cast &>(*null_column).getData(); - fillArrowArray(column_name, nested_column, nested_type, &bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArray(column_name, nested_column, nested_type, &bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (isString(column_type)) { - fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); + if (output_string_as_string) + fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); + else + fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); } else if (isFixedString(column_type)) { - fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); + if (output_string_as_string) + fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); + else + fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); } else if (isDate(column_type)) { @@ -477,21 +489,21 @@ namespace DB } else if (isArray(column_type)) { - fillArrowArrayWithArrayColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArrayWithArrayColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (isTuple(column_type)) { - fillArrowArrayWithTupleColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArrayWithTupleColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (column_type->getTypeId() == TypeIndex::LowCardinality) { - fillArrowArrayWithLowCardinalityColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArrayWithLowCardinalityColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (isMap(column_type)) { ColumnPtr column_array = assert_cast(column.get())->getNestedColumnPtr(); DataTypePtr array_type = assert_cast(column_type.get())->getNestedType(); - fillArrowArrayWithArrayColumnData(column_name, column_array, array_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArrayWithArrayColumnData(column_name, column_array, array_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (isDecimal(column_type)) { @@ -603,13 +615,13 @@ namespace DB } static std::shared_ptr getArrowType( - DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * out_is_column_nullable) + DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool output_string_as_string, bool * out_is_column_nullable) { if (column_type->isNullable()) { DataTypePtr nested_type = assert_cast(column_type.get())->getNestedType(); ColumnPtr nested_column = assert_cast(column.get())->getNestedColumnPtr(); - auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable); + auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, output_string_as_string, out_is_column_nullable); *out_is_column_nullable = true; return arrow_type; } @@ -643,7 +655,7 @@ namespace DB { auto nested_type = assert_cast(column_type.get())->getNestedType(); auto nested_column = assert_cast(column.get())->getDataPtr(); - auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable); + auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, output_string_as_string, out_is_column_nullable); return arrow::list(nested_arrow_type); } @@ -655,7 +667,7 @@ namespace DB for (size_t i = 0; i != nested_types.size(); ++i) { String name = column_name + "." + std::to_string(i); - auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, out_is_column_nullable); + auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, output_string_as_string, out_is_column_nullable); nested_fields.push_back(std::make_shared(name, nested_arrow_type, *out_is_column_nullable)); } return arrow::struct_(nested_fields); @@ -669,7 +681,7 @@ namespace DB const auto & indexes_column = lc_column->getIndexesPtr(); return arrow::dictionary( getArrowTypeForLowCardinalityIndexes(indexes_column), - getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable)); + getArrowType(nested_type, nested_column, column_name, format_name, output_string_as_string, out_is_column_nullable)); } if (isMap(column_type)) @@ -680,10 +692,19 @@ namespace DB const auto & columns = assert_cast(column.get())->getNestedData().getColumns(); return arrow::map( - getArrowType(key_type, columns[0], column_name, format_name, out_is_column_nullable), - getArrowType(val_type, columns[1], column_name, format_name, out_is_column_nullable)); + getArrowType(key_type, columns[0], column_name, format_name, output_string_as_string, out_is_column_nullable), + getArrowType(val_type, columns[1], column_name, format_name, output_string_as_string, out_is_column_nullable)); } + if (isDateTime64(column_type)) + { + const auto * datetime64_type = assert_cast(column_type.get()); + return arrow::timestamp(getArrowTimeUnit(datetime64_type), datetime64_type->getTimeZone().getTimeZone()); + } + + if (isStringOrFixedString(column_type) && output_string_as_string) + return arrow::utf8(); + const std::string type_name = column_type->getFamilyName(); if (const auto * arrow_type_it = std::find_if( internal_type_to_arrow_type.begin(), @@ -694,19 +715,13 @@ namespace DB return arrow_type_it->second; } - if (isDateTime64(column_type)) - { - const auto * datetime64_type = assert_cast(column_type.get()); - return arrow::timestamp(getArrowTimeUnit(datetime64_type), datetime64_type->getTimeZone().getTimeZone()); - } - throw Exception(ErrorCodes::UNKNOWN_TYPE, "The type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getName(), column_name, format_name); } - CHColumnToArrowColumn::CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_) - : format_name(format_name_), low_cardinality_as_dictionary(low_cardinality_as_dictionary_) + CHColumnToArrowColumn::CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_, bool output_string_as_string_) + : format_name(format_name_), low_cardinality_as_dictionary(low_cardinality_as_dictionary_), output_string_as_string(output_string_as_string_) { arrow_fields.reserve(header.columns()); header_columns.reserve(header.columns()); @@ -741,7 +756,7 @@ namespace DB if (!is_arrow_fields_initialized) { bool is_column_nullable = false; - auto arrow_type = getArrowType(header_column.type, column, header_column.name, format_name, &is_column_nullable); + auto arrow_type = getArrowType(header_column.type, column, header_column.name, format_name, output_string_as_string, &is_column_nullable); arrow_fields.emplace_back(std::make_shared(header_column.name, arrow_type, is_column_nullable)); } @@ -751,7 +766,7 @@ namespace DB checkStatus(status, column->getName(), format_name); fillArrowArray( - header_column.name, column, header_column.type, nullptr, array_builder.get(), format_name, 0, column->size(), dictionary_values); + header_column.name, column, header_column.type, nullptr, array_builder.get(), format_name, 0, column->size(), output_string_as_string, dictionary_values); std::shared_ptr arrow_array; status = array_builder->Finish(&arrow_array); diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h index 50de8045d5f..2896fb3642f 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h @@ -14,7 +14,7 @@ namespace DB class CHColumnToArrowColumn { public: - CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_); + CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_, bool output_string_as_string_); void chChunkToArrowTable(std::shared_ptr & res, const Chunk & chunk, size_t columns_num); @@ -32,6 +32,10 @@ private: /// because LowCardinality column from header always has indexes type UInt8, so, we should get /// proper indexes type from first chunk of data. bool is_arrow_fields_initialized = false; + + /// Output columns with String data type as Arrow::String type. + /// By default Arrow::Binary is used. + bool output_string_as_string = false; }; } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 106b71a9df5..aaa3e8fe976 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -100,6 +100,8 @@ ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & t case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: { + if (format_settings.orc.output_string_as_string) + return orc::createPrimitiveType(orc::TypeKind::STRING); return orc::createPrimitiveType(orc::TypeKind::BINARY); } case TypeIndex::Nullable: diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 68e2ae1c6eb..c8e94311af5 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -29,7 +29,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) if (!ch_column_to_arrow_column) { const Block & header = getPort(PortKind::Main).getHeader(); - ch_column_to_arrow_column = std::make_unique(header, "Parquet", false); + ch_column_to_arrow_column = std::make_unique(header, "Parquet", false, format_settings.parquet.output_string_as_string); } ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunk, columns_num); diff --git a/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.reference b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.reference new file mode 100644 index 00000000000..f0ab418f0ce --- /dev/null +++ b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.reference @@ -0,0 +1,3 @@ +s Nullable(String) +s Nullable(String) +s Nullable(String) diff --git a/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql new file mode 100644 index 00000000000..37ebc1c748e --- /dev/null +++ b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql @@ -0,0 +1,6 @@ +insert into function file(data_02304.parquet) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_parquet_string_as_string=1; +desc file(data_02304.parquet); +insert into function file(data_02304.orc) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_orc_string_as_string=1; +desc file(data_02304.orc); +insert into function file(data_02304.arrow) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_arrow_string_as_string=1; +desc file(data_02304.arrow); From d10b64468ea0362f6a37486743cf6a941ba29ecc Mon Sep 17 00:00:00 2001 From: Vxider Date: Wed, 18 May 2022 15:00:08 +0000 Subject: [PATCH 231/615] update code style --- ...065_window_view_event_hop_watch_bounded.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index fd9499b12b6..9604632fca3 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -24,7 +24,9 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("CREATE DATABASE IF NOT EXISTS 01065_window_view_event_hop_watch_bounded") + client1.send( + "CREATE DATABASE IF NOT EXISTS 01065_window_view_event_hop_watch_bounded" + ) client1.expect(prompt) client1.send("DROP TABLE IF EXISTS 01065_window_view_event_hop_watch_bounded.mt") client1.expect(prompt) @@ -43,14 +45,22 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01065_window_view_event_hop_watch_bounded.wv") client1.expect("Query id" + end_of_block) client1.expect("Progress: 0.00 rows.*\)") - client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');" + ) client2.expect("Ok.") - client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:05');") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:05');" + ) client2.expect("Ok.") client1.expect("1*" + end_of_block) - client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:06');") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:06');" + ) client2.expect("Ok.") - client2.send("INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:10');") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:10');" + ) client2.expect("Ok.") client1.expect("1*" + end_of_block) client1.expect("2*" + end_of_block) From 41f7424ba68d835eb66e1a5224384cff799cce4a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 18 May 2022 17:13:17 +0200 Subject: [PATCH 232/615] Improve deletion logic --- src/Disks/S3ObjectStorage.cpp | 97 +++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 33 deletions(-) diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/S3ObjectStorage.cpp index f09bbc28474..9c33553c6b0 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/S3ObjectStorage.cpp @@ -60,6 +60,19 @@ void throwIfError(const Aws::Utils::Outcome & response) } } +template +void logIfError(const Aws::Utils::Outcome & response, std::function && msg) +{ + try + { + throwIfError(response); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, msg()); + } +} + } Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const @@ -212,26 +225,34 @@ void S3ObjectStorage::removeObjects(const std::vector & paths) return; auto client_ptr = client.get(); - std::vector keys; - keys.reserve(paths.size()); + auto settings_ptr = s3_settings.get(); - for (const auto & path : paths) + size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; + size_t current_position = 0; + + while (current_position < paths.size()) { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(path); - keys.push_back(obj); + std::vector current_chunk; + String keys; + for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(paths[current_position]); + current_chunk.push_back(obj); + + if (!keys.empty()) + keys += ", "; + keys += paths[current_position]; + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(current_chunk); + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); } - - Aws::S3::Model::Delete delkeys; - delkeys.SetObjects(keys); - - Aws::S3::Model::DeleteObjectsRequest request; - request.SetBucket(bucket); - request.SetDelete(delkeys); - auto outcome = client_ptr->DeleteObjects(request); - - throwIfError(outcome); - } void S3ObjectStorage::removeObjectIfExists(const std::string & path) @@ -255,25 +276,35 @@ void S3ObjectStorage::removeObjectsIfExist(const std::vector & path return; auto client_ptr = client.get(); + auto settings_ptr = s3_settings.get(); - std::vector keys; - keys.reserve(paths.size()); - for (const auto & path : paths) + + size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; + size_t current_position = 0; + + while (current_position < paths.size()) { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(path); - keys.push_back(obj); + std::vector current_chunk; + String keys; + for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(paths[current_position]); + current_chunk.push_back(obj); + + if (!keys.empty()) + keys += ", "; + keys += paths[current_position]; + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(current_chunk); + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + auto outcome = client_ptr->DeleteObjects(request); + logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); } - - Aws::S3::Model::Delete delkeys; - delkeys.SetObjects(keys); - - Aws::S3::Model::DeleteObjectsRequest request; - request.SetBucket(bucket); - request.SetDelete(delkeys); - auto outcome = client_ptr->DeleteObjects(request); - - throwIfError(outcome); } ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const From de14e35e4731b3ace7dfec29ce5db4b3d1167b15 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 18 May 2022 17:16:08 +0200 Subject: [PATCH 233/615] Fix fast test --- .../queries/0_stateless/02302_defaults_in_columnar_formats.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql index 46dedf12253..0262199b713 100644 --- a/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql +++ b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + insert into function file(data_02302.parquet) select 1 as x settings engine_file_truncate_on_insert=1; select * from file(data_02302.parquet, auto, 'x UInt8, y default 42, z default x + y') settings input_format_parquet_allow_missing_columns=1; insert into function file(data_02302.orc) select 1 as x settings engine_file_truncate_on_insert=1; From ff59ccd3c786f2f95ba1adf9821313085142bc65 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 18 May 2022 17:43:28 +0200 Subject: [PATCH 234/615] Update test --- .../0_stateless/02304_orc_arrow_parquet_string_as_string.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql index 37ebc1c748e..e354f303c7f 100644 --- a/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql +++ b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + insert into function file(data_02304.parquet) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_parquet_string_as_string=1; desc file(data_02304.parquet); insert into function file(data_02304.orc) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_orc_string_as_string=1; From d81616ff657b85ce37e36b8115143cd06cea2112 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 18 May 2022 17:44:39 +0200 Subject: [PATCH 235/615] Remove unnecessary include --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 543d09a48d3..7c5dd2a03ea 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -36,7 +36,6 @@ #include #include -#include /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ From cd6a29897ec4018d61e21c65624b2447991a7fcb Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 18 May 2022 17:56:36 +0000 Subject: [PATCH 236/615] Apply input_format_max_rows_to_read_for_schema_inference for all files in globs in total --- src/Core/Settings.h | 2 +- src/Formats/ReadSchemaUtils.cpp | 20 +++++++++- src/Processors/Formats/ISchemaReader.cpp | 37 ++++++++++++------- src/Processors/Formats/ISchemaReader.h | 13 ++++++- .../Impl/JSONEachRowRowInputFormat.cpp | 2 +- .../Formats/Impl/TSKVRowInputFormat.cpp | 5 +-- ...2305_schema_inference_with_globs.reference | 6 +++ .../02305_schema_inference_with_globs.sh | 17 +++++++++ 8 files changed, 81 insertions(+), 21 deletions(-) create mode 100644 tests/queries/0_stateless/02305_schema_inference_with_globs.reference create mode 100755 tests/queries/0_stateless/02305_schema_inference_with_globs.sh diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 29427c673ac..6878242b56f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -660,7 +660,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \ M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \ M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ - M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \ + M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \ M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \ M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Parquet", 0) \ diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 26944f85014..035546031d8 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -73,11 +73,15 @@ ColumnsDescription readSchemaFromFormat( { std::string exception_messages; SchemaReaderPtr schema_reader; + size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference : context->getSettingsRef().input_format_max_rows_to_read_for_schema_inference; + size_t iterations = 0; while ((buf = read_buffer_iterator())) { + ++iterations; + if (buf->eof()) { - auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is emptyg", format_name); + auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is empty", format_name); if (!retry) throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, exception_message); @@ -89,12 +93,26 @@ ColumnsDescription readSchemaFromFormat( try { schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf, context, format_settings); + schema_reader->setMaxRowsToRead(max_rows_to_read); names_and_types = schema_reader->readSchema(); break; } catch (...) { auto exception_message = getCurrentExceptionMessage(false); + size_t rows_read = schema_reader->getNumRowsRead(); + assert(rows_read <= max_rows_to_read); + max_rows_to_read -= schema_reader->getNumRowsRead(); + if (rows_read != 0 && max_rows_to_read == 0) + { + exception_message += "\nTo increase the maximum number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference"; + if (iterations > 1) + { + exception_messages += "\n" + exception_message; + break; + } + retry = false; + } if (!retry || !isRetryableSchemaInferenceError(getCurrentExceptionCode())) throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, exception_message); diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index f23f33c482d..11b5afc78f1 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -12,6 +12,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int INCORRECT_DATA; extern const int EMPTY_DATA_PASSED; + extern const int BAD_ARGUMENTS; } static void chooseResultType( @@ -48,16 +49,14 @@ static void chooseResultType( } } -static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t max_rows_to_read) +static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read) { if (!type) { if (!default_type) throw Exception( ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, - "Cannot determine table structure by first {} rows of data, because some columns contain only Nulls. To increase the maximum " - "number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference", - max_rows_to_read); + "Cannot determine table structure by first {} rows of data, because some columns contain only Nulls", rows_read); type = default_type; } @@ -65,7 +64,7 @@ static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, c } IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings) - : ISchemaReader(in_), max_rows_to_read(format_settings.max_rows_to_read_for_schema_inference) + : ISchemaReader(in_) { if (!format_settings.column_names_for_schema_inference.empty()) { @@ -94,8 +93,14 @@ IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & form NamesAndTypesList IRowSchemaReader::readSchema() { + if (max_rows_to_read == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot read rows to determine the schema, the maximum number of rows to read is set to 0. " + "Most likely setting input_format_max_rows_to_read_for_schema_inference is set to 0"); + DataTypes data_types = readRowAndGetDataTypes(); - for (size_t row = 1; row < max_rows_to_read; ++row) + for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read) { DataTypes new_data_types = readRowAndGetDataTypes(); if (new_data_types.empty()) @@ -111,7 +116,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() if (!new_data_types[i]) continue; - chooseResultType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), row); + chooseResultType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), rows_read); } } @@ -136,7 +141,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() for (size_t i = 0; i != data_types.size(); ++i) { /// Check that we could determine the type of this column. - checkTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), max_rows_to_read); + checkTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), rows_read); } return result; @@ -151,13 +156,19 @@ DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const return nullptr; } -IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_) - : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_) +IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, DataTypePtr default_type_) + : ISchemaReader(in_), default_type(default_type_) { } NamesAndTypesList IRowWithNamesSchemaReader::readSchema() { + if (max_rows_to_read == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot read rows to determine the schema, the maximum number of rows to read is set to 0. " + "Most likely setting input_format_max_rows_to_read_for_schema_inference is set to 0"); + bool eof = false; auto names_and_types = readRowAndGetNamesAndDataTypes(eof); std::unordered_map names_to_types; @@ -170,7 +181,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() names_order.push_back(name); } - for (size_t row = 1; row < max_rows_to_read; ++row) + for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read) { auto new_names_and_types = readRowAndGetNamesAndDataTypes(eof); if (eof) @@ -189,7 +200,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() } auto & type = it->second; - chooseResultType(type, new_type, common_type_checker, default_type, name, row); + chooseResultType(type, new_type, common_type_checker, default_type, name, rows_read); } } @@ -202,7 +213,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() { auto & type = names_to_types[name]; /// Check that we could determine the type of this column. - checkTypeAndAppend(result, type, name, default_type, max_rows_to_read); + checkTypeAndAppend(result, type, name, default_type, rows_read); } return result; diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index a8eff762856..f748680ed24 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -26,6 +26,9 @@ public: virtual bool needContext() const { return false; } virtual void setContext(ContextPtr &) {} + virtual void setMaxRowsToRead(size_t) {} + virtual size_t getNumRowsRead() const { return 0; } + virtual ~ISchemaReader() = default; protected: @@ -61,10 +64,14 @@ protected: void setColumnNames(const std::vector & names) { column_names = names; } + void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; } + size_t getNumRowsRead() const override { return rows_read; } + private: DataTypePtr getDefaultType(size_t column) const; size_t max_rows_to_read; + size_t rows_read = 0; DataTypePtr default_type; DataTypes default_types; CommonDataTypeChecker common_type_checker; @@ -79,7 +86,7 @@ private: class IRowWithNamesSchemaReader : public ISchemaReader { public: - IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr); + IRowWithNamesSchemaReader(ReadBuffer & in_, DataTypePtr default_type_ = nullptr); NamesAndTypesList readSchema() override; bool hasStrictOrderOfColumns() const override { return false; } @@ -92,8 +99,12 @@ protected: /// Set eof = true if can't read more data. virtual NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) = 0; + void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; } + size_t getNumRowsRead() const override { return rows_read; } + private: size_t max_rows_to_read; + size_t rows_read = 0; DataTypePtr default_type; CommonDataTypeChecker common_type_checker; }; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 4fb7a40ebfc..30dca893afa 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -307,7 +307,7 @@ void JSONEachRowRowInputFormat::readSuffix() } JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings) - : IRowWithNamesSchemaReader(in_, format_settings.max_rows_to_read_for_schema_inference) + : IRowWithNamesSchemaReader(in_) , json_strings(json_strings_) { bool allow_bools_as_numbers = format_settings.json.read_bools_as_numbers; diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 26c7d1aced5..5c48062ace8 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -214,10 +214,7 @@ void TSKVRowInputFormat::resetParser() } TSKVSchemaReader::TSKVSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) - : IRowWithNamesSchemaReader( - in_, - format_settings_.max_rows_to_read_for_schema_inference, - getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped)) + : IRowWithNamesSchemaReader(in_, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped)) , format_settings(format_settings_) { } diff --git a/tests/queries/0_stateless/02305_schema_inference_with_globs.reference b/tests/queries/0_stateless/02305_schema_inference_with_globs.reference new file mode 100644 index 00000000000..3a92ee54da4 --- /dev/null +++ b/tests/queries/0_stateless/02305_schema_inference_with_globs.reference @@ -0,0 +1,6 @@ +2 +4 +6 +8 +x Nullable(String) +x Nullable(String) diff --git a/tests/queries/0_stateless/02305_schema_inference_with_globs.sh b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh new file mode 100755 index 00000000000..346931e7204 --- /dev/null +++ b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into function file(data1.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(data2.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(data3.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(data4.jsonl) select number % 2 ? number : NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" + +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=8" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=16" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=24" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=31" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=32" +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=100" From e453132db813e404c10e47aa8eb60ae54f2e8754 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Wed, 18 May 2022 15:25:59 -0400 Subject: [PATCH 237/615] remove hashid define guard --- contrib/hashidsxx-cmake/CMakeLists.txt | 7 ------- src/Common/config.h.in | 1 - src/Core/config_core.h.in | 1 - src/Functions/FunctionHashID.cpp | 6 ------ src/Functions/FunctionHashID.h | 22 +++++++++------------- src/Functions/registerFunctions.cpp | 3 --- src/configure_config.cmake | 3 --- tests/queries/0_stateless/02293_hashid.sql | 2 -- 8 files changed, 9 insertions(+), 36 deletions(-) diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt index f916355251d..17f3888bd94 100644 --- a/contrib/hashidsxx-cmake/CMakeLists.txt +++ b/contrib/hashidsxx-cmake/CMakeLists.txt @@ -1,10 +1,3 @@ -option(ENABLE_HASHIDSXX "Enable hashidsxx" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_HASHIDSXX) - message(STATUS "Not using hashidsxx") - return() -endif() - set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx") set (SRCS diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 0715cc2335f..d8d308c59bd 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -26,4 +26,3 @@ #cmakedefine01 USE_ODBC #cmakedefine01 USE_REPLXX #cmakedefine01 USE_JEMALLOC -#cmakedefine01 USE_HASHIDSXX diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 6d296c2dbff..3fc2503aaa5 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -21,4 +21,3 @@ #cmakedefine01 USE_ODBC #cmakedefine01 USE_REPLXX #cmakedefine01 USE_JEMALLOC -#cmakedefine01 USE_HASHIDSXX diff --git a/src/Functions/FunctionHashID.cpp b/src/Functions/FunctionHashID.cpp index 8ef81c5cfd1..bd875a9d4ff 100644 --- a/src/Functions/FunctionHashID.cpp +++ b/src/Functions/FunctionHashID.cpp @@ -1,10 +1,6 @@ #include "FunctionHashID.h" - - #include -#if USE_HASHIDSXX - namespace DB { @@ -14,5 +10,3 @@ void registerFunctionHashID(FunctionFactory & factory) } } - -#endif diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index 8cc789d3680..45def95b8de 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -2,19 +2,17 @@ #include -#if USE_HASHIDSXX +#include -# include +#include +#include +#include +#include +#include +#include -# include -# include -# include -# include -# include -# include - -# include -# include +#include +#include namespace DB { @@ -159,5 +157,3 @@ public: }; } - -#endif diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index d70f019b3e1..aee5f22cf55 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -132,10 +132,7 @@ void registerFunctions() #endif registerFunctionTid(factory); registerFunctionLogTrace(factory); - -#if USE_HASHIDSXX registerFunctionHashID(factory); -#endif } } diff --git a/src/configure_config.cmake b/src/configure_config.cmake index aa1419c7792..519307ba28a 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -100,6 +100,3 @@ endif() if (TARGET ch_contrib::jemalloc) set(USE_JEMALLOC 1) endif() -if (TARGET ch_contrib::hashidsxx) - set(USE_HASHIDSXX 1) -endif() diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql index 773afc1e0de..43dd77bb196 100644 --- a/tests/queries/0_stateless/02293_hashid.sql +++ b/tests/queries/0_stateless/02293_hashid.sql @@ -1,5 +1,3 @@ --- Tags: no-fasttest - select number, hashid(number) from system.numbers limit 5; select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; select hashid(1234567890123456, 's3cr3t'); From 2995b69f4a954910499248d217d387c2c97eaa5c Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 18 May 2022 19:31:14 +0000 Subject: [PATCH 238/615] Swap order of converting_join_columns and before_join steps --- src/Interpreters/ExpressionAnalyzer.cpp | 11 ++++++----- src/Interpreters/ExpressionAnalyzer.h | 4 ++-- src/Interpreters/InterpreterSelectQuery.cpp | 18 +++++++++--------- .../02302_clash_const_aggegate_join.reference | 1 + .../02302_clash_const_aggegate_join.sql | 15 +++++++++++++++ 5 files changed, 33 insertions(+), 16 deletions(-) create mode 100644 tests/queries/0_stateless/02302_clash_const_aggegate_join.reference create mode 100644 tests/queries/0_stateless/02302_clash_const_aggegate_join.sql diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 0b1154f6fd1..14bb8f8e8c2 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -976,16 +976,16 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi return array_join; } -bool SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types) +void SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types) { ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions()); - return true; } -JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns) +JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns, ActionsDAGPtr & before_join) { + const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); JoinPtr table_join = makeTableJoin(*syntax->ast_join, left_sample_columns, converting_join_columns); @@ -995,6 +995,8 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain chain.addStep(); } + before_join = chain.getLastActions(); + ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); chain.steps.push_back(std::make_unique( syntax->analyzed_join, table_join, step.getResultColumns())); @@ -1762,8 +1764,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (query_analyzer.hasTableJoin()) { query_analyzer.appendJoinLeftKeys(chain, only_types || !first_stage); - before_join = chain.getLastActions(); - join = query_analyzer.appendJoin(chain, converting_join_columns); + join = query_analyzer.appendJoin(chain, converting_join_columns, before_join); chain.addStep(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 85efb3829d0..ef97fbf175a 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -377,8 +377,8 @@ private: /// Before aggregation: ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types); - bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); - JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns); + void appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); + JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns, ActionsDAGPtr & before_join); /// remove_filter is set in ExpressionActionsChain::finalize(); /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bfadc66352..cfb64366e9b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1251,15 +1251,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( - query_plan.getCurrentDataStream(), - expressions.before_join); - before_join_step->setStepDescription("Before JOIN"); - query_plan.addStep(std::move(before_join_step)); - } - /// Optional step to convert key columns to common supertype. if (expressions.converting_join_columns) { @@ -1270,6 +1261,15 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( + query_plan.getCurrentDataStream(), + expressions.before_join); + before_join_step->setStepDescription("Before JOIN"); + query_plan.addStep(std::move(before_join_step)); + } + if (expressions.hasJoin()) { if (expressions.join->isFilled()) diff --git a/tests/queries/0_stateless/02302_clash_const_aggegate_join.reference b/tests/queries/0_stateless/02302_clash_const_aggegate_join.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02302_clash_const_aggegate_join.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02302_clash_const_aggegate_join.sql b/tests/queries/0_stateless/02302_clash_const_aggegate_join.sql new file mode 100644 index 00000000000..979a19340d0 --- /dev/null +++ b/tests/queries/0_stateless/02302_clash_const_aggegate_join.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS e; + +-- https://github.com/ClickHouse/ClickHouse/issues/36891 + +CREATE TABLE e ( a UInt64, t DateTime ) ENGINE = MergeTree PARTITION BY toDate(t) ORDER BY tuple(); + +INSERT INTO e SELECT 1, toDateTime('2020-02-01 12:00:01') + INTERVAL number MONTH FROM numbers(10); + +SELECT any('1') +FROM e JOIN ( SELECT 1 :: UInt32 AS key) AS da ON key = a +PREWHERE toString(a) = '1'; + +-- SELECT sumIf( 1, if( 1, toDateTime('2020-01-01 00:00:00', 'UTC'), toDateTime('1970-01-01 00:00:00', 'UTC')) > t ) +-- FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a +-- WHERE t >= toDateTime('2021-07-19T13:00:00', 'UTC') AND t <= toDateTime('2021-07-19T13:59:59', 'UTC'); From a2214760c646aecf21d0203ef2eb7733e24acab2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 18 May 2022 22:01:14 +0200 Subject: [PATCH 239/615] Remove mark_cache_size from further files - changed config.xml/yaml files used by CH's own internal tests which are (hopefully) not sensitive to mark_cache_size being set or not - further occurrences exist but changing them seems a bad idea (e.g. because they are in customer-provided data) --- programs/server/embedded.xml | 1 - .../test_config_corresponding_root/configs/config.xml | 7 ------- tests/integration/test_config_xml_full/configs/config.xml | 6 ------ .../integration/test_config_xml_full/configs/embedded.xml | 1 - tests/integration/test_config_xml_main/configs/config.xml | 2 -- .../integration/test_config_xml_main/configs/embedded.xml | 1 - .../test_config_xml_yaml_mix/configs/config.xml | 2 -- .../test_config_xml_yaml_mix/configs/embedded.xml | 1 - .../integration/test_config_yaml_full/configs/config.yaml | 1 - .../integration/test_config_yaml_full/configs/embedded.xml | 1 - .../integration/test_config_yaml_main/configs/config.yaml | 1 - .../integration/test_config_yaml_main/configs/embedded.xml | 1 - .../configs/config.xml | 1 - .../configs/config.xml | 1 - .../test_dictionaries_dependency_xml/configs/config.xml | 1 - .../integration/test_dictionaries_mysql/configs/config.xml | 1 - .../test_dictionaries_null_value/configs/config.xml | 1 - .../test_dictionaries_select_all/configs/config.xml | 1 - .../test_dictionaries_update_and_reload/configs/config.xml | 1 - .../test_dictionaries_update_field/configs/config.xml | 1 - .../configs/config.xml | 1 - .../test_dictionary_custom_settings/configs/config.xml | 1 - tests/integration/test_disk_types/configs/config.xml | 1 - .../integration/test_https_replication/configs/config.xml | 7 ------- .../integration/test_join_set_family_s3/configs/config.xml | 1 - tests/integration/test_log_family_s3/configs/config.xml | 1 - .../test_logs_level/configs/config_information.xml | 1 - .../configs/config.xml | 1 - .../test_merge_tree_azure_blob_storage/configs/config.xml | 1 - tests/integration/test_merge_tree_s3/configs/config.xml | 1 - .../test_merge_tree_s3_failover/configs/config.xml | 1 - .../test_merge_tree_s3_restore/configs/config.xml | 1 - .../test_merge_tree_s3_with_cache/configs/config.xml | 1 - tests/integration/test_mysql_protocol/configs/config.xml | 1 - tests/integration/test_odbc_interaction/configs/config.xml | 1 - .../test_postgresql_protocol/configs/config.xml | 1 - .../integration/test_profile_events_s3/configs/config.xml | 1 - .../test_reload_auxiliary_zookeepers/configs/config.xml | 1 - .../test_reload_max_table_size_to_drop/configs/config.xml | 1 - tests/integration/test_reload_zookeeper/configs/config.xml | 1 - tests/integration/test_s3_with_https/configs/config.xml | 1 - tests/integration/test_s3_with_proxy/configs/config.xml | 1 - 42 files changed, 61 deletions(-) diff --git a/programs/server/embedded.xml b/programs/server/embedded.xml index ba0df99dfe0..2b6c4d9f770 100644 --- a/programs/server/embedded.xml +++ b/programs/server/embedded.xml @@ -13,7 +13,6 @@ ./ 8589934592 - 5368709120 true diff --git a/tests/integration/test_config_corresponding_root/configs/config.xml b/tests/integration/test_config_corresponding_root/configs/config.xml index e1a1c1c75df..914d4e6cb0e 100644 --- a/tests/integration/test_config_corresponding_root/configs/config.xml +++ b/tests/integration/test_config_corresponding_root/configs/config.xml @@ -101,13 +101,6 @@ --> 8589934592 - - 5368709120 - - /var/lib/clickhouse/ diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index f7a0afecac5..55e84568af1 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -304,12 +304,6 @@ --> 8589934592 - - 5368709120 - 8589934592 - - 5368709120 - - /var/lib/clickhouse/ diff --git a/tests/integration/test_join_set_family_s3/configs/config.xml b/tests/integration/test_join_set_family_s3/configs/config.xml index ca4bdf15b43..87270096ccd 100644 --- a/tests/integration/test_join_set_family_s3/configs/config.xml +++ b/tests/integration/test_join_set_family_s3/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_log_family_s3/configs/config.xml b/tests/integration/test_log_family_s3/configs/config.xml index ca4bdf15b43..87270096ccd 100644 --- a/tests/integration/test_log_family_s3/configs/config.xml +++ b/tests/integration/test_log_family_s3/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_logs_level/configs/config_information.xml b/tests/integration/test_logs_level/configs/config_information.xml index 9275a468990..17342b36c57 100644 --- a/tests/integration/test_logs_level/configs/config_information.xml +++ b/tests/integration/test_logs_level/configs/config_information.xml @@ -19,7 +19,6 @@ 500 - 5368709120 users.xml /etc/clickhouse-server/config.d/*.xml diff --git a/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml b/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml index 72801915721..58087e93882 100644 --- a/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml +++ b/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml @@ -12,6 +12,5 @@ /var/lib/clickhouse/ - 5368709120 users.xml diff --git a/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml +++ b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_merge_tree_s3/configs/config.xml b/tests/integration/test_merge_tree_s3/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_merge_tree_s3/configs/config.xml +++ b/tests/integration/test_merge_tree_s3/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_merge_tree_s3_restore/configs/config.xml +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_merge_tree_s3_with_cache/configs/config.xml b/tests/integration/test_merge_tree_s3_with_cache/configs/config.xml index ba0f02d1fa7..ccba76b71a6 100644 --- a/tests/integration/test_merge_tree_s3_with_cache/configs/config.xml +++ b/tests/integration/test_merge_tree_s3_with_cache/configs/config.xml @@ -21,7 +21,6 @@ 500 - 0 ./clickhouse/ users.xml diff --git a/tests/integration/test_mysql_protocol/configs/config.xml b/tests/integration/test_mysql_protocol/configs/config.xml index 07f22392939..a2ba348eabd 100644 --- a/tests/integration/test_mysql_protocol/configs/config.xml +++ b/tests/integration/test_mysql_protocol/configs/config.xml @@ -29,7 +29,6 @@ 127.0.0.1 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_odbc_interaction/configs/config.xml b/tests/integration/test_odbc_interaction/configs/config.xml index e9d426b2f71..861c81248d7 100644 --- a/tests/integration/test_odbc_interaction/configs/config.xml +++ b/tests/integration/test_odbc_interaction/configs/config.xml @@ -28,7 +28,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_postgresql_protocol/configs/config.xml b/tests/integration/test_postgresql_protocol/configs/config.xml index 1dade9247b3..b3340627751 100644 --- a/tests/integration/test_postgresql_protocol/configs/config.xml +++ b/tests/integration/test_postgresql_protocol/configs/config.xml @@ -29,7 +29,6 @@ 127.0.0.1 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_profile_events_s3/configs/config.xml b/tests/integration/test_profile_events_s3/configs/config.xml index 665e85dfe2f..226e3e7efbe 100644 --- a/tests/integration/test_profile_events_s3/configs/config.xml +++ b/tests/integration/test_profile_events_s3/configs/config.xml @@ -29,7 +29,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_reload_auxiliary_zookeepers/configs/config.xml b/tests/integration/test_reload_auxiliary_zookeepers/configs/config.xml index 98f07de912d..d1c2e5958c8 100644 --- a/tests/integration/test_reload_auxiliary_zookeepers/configs/config.xml +++ b/tests/integration/test_reload_auxiliary_zookeepers/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_reload_max_table_size_to_drop/configs/config.xml b/tests/integration/test_reload_max_table_size_to_drop/configs/config.xml index 98f07de912d..d1c2e5958c8 100644 --- a/tests/integration/test_reload_max_table_size_to_drop/configs/config.xml +++ b/tests/integration/test_reload_max_table_size_to_drop/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_reload_zookeeper/configs/config.xml b/tests/integration/test_reload_zookeeper/configs/config.xml index 98f07de912d..d1c2e5958c8 100644 --- a/tests/integration/test_reload_zookeeper/configs/config.xml +++ b/tests/integration/test_reload_zookeeper/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_s3_with_https/configs/config.xml b/tests/integration/test_s3_with_https/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_s3_with_https/configs/config.xml +++ b/tests/integration/test_s3_with_https/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_s3_with_proxy/configs/config.xml b/tests/integration/test_s3_with_proxy/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_s3_with_proxy/configs/config.xml +++ b/tests/integration/test_s3_with_proxy/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml From b48cd13c07ae992a27036042d14568395866ed13 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 18 May 2022 22:25:34 +0200 Subject: [PATCH 240/615] Remove unused file --- cmake/generate_code.cmake | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 cmake/generate_code.cmake diff --git a/cmake/generate_code.cmake b/cmake/generate_code.cmake deleted file mode 100644 index 8eb9da24d1d..00000000000 --- a/cmake/generate_code.cmake +++ /dev/null @@ -1,5 +0,0 @@ -function(generate_code TEMPLATE_FILE) - foreach(NAME IN LISTS ARGN) - configure_file (${TEMPLATE_FILE}.cpp.in ${CMAKE_CURRENT_BINARY_DIR}/generated/${TEMPLATE_FILE}_${NAME}.cpp) - endforeach() -endfunction() From 1d89738c6adcc024517263f55debd9e13cd8b96a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 18 May 2022 22:43:51 +0200 Subject: [PATCH 241/615] Remove performance workaround for old GCC GCC support these days is experimental. GCCs main use is to keep the code portable but I don't think it's used for performance tests. Hence removing a performance workaround added in the GCC 7 days (we are now using 11, soon: 12). --- src/CMakeLists.txt | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1cbcd927216..c1bf685aec0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,18 +37,6 @@ if (USE_DEBUG_HELPERS) add_compile_options($<$:${INCLUDE_DEBUG_HELPERS}>) endif () -if (COMPILER_GCC) - # If we leave this optimization enabled, gcc-7 replaces a pair of SSE intrinsics (16 byte load, store) with a call to memcpy. - # It leads to slow code. This is compiler bug. It looks like this: - # - # (gdb) bt - #0 memcpy (destination=0x7faa6e9f1638, source=0x7faa81d9e9a8, size=16) at ../libs/libmemcpy/memcpy.h:11 - #1 0x0000000005341c5f in _mm_storeu_si128 (__B=..., __P=) at /usr/lib/gcc/x86_64-linux-gnu/7/include/emmintrin.h:720 - #2 memcpySmallAllowReadWriteOverflow15Impl (n=, src=, dst=) at ../src/Common/memcpySmall.h:37 - - add_definitions ("-fno-tree-loop-distribute-patterns") -endif () - # ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`). # If turned ON, this option defines such macro. # See `src/Common/TargetSpecific.h` From d6d1c22008bfbe8f81dfec2ba32d576bedc7f088 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Wed, 18 May 2022 16:57:21 -0400 Subject: [PATCH 242/615] better argument type checking --- src/Functions/FunctionHashID.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index 45def95b8de..1eb090e5b45 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -47,7 +47,8 @@ public: if (arguments.size() < 1) throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName()); - if (!isUnsignedInteger(arguments[0].type)) + const auto & id_col = arguments[0]; + if (!isUnsignedInteger(id_col.type)) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must be unsigned integer, got {}", @@ -56,7 +57,9 @@ public: if (arguments.size() > 1) { - if (!isString(arguments[1].type)) + const auto & hash_col = arguments[1]; + const auto * hash_col_type_const = typeid_cast(hash_col.column.get()); + if (!isString(hash_col.type) || !hash_col_type_const) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of function {} must be String, got {}", @@ -66,7 +69,9 @@ public: if (arguments.size() > 2) { - if (!isUInt8(arguments[2].type)) + const auto & min_length_col = arguments[2]; + const auto * min_length_col_type_const = typeid_cast(min_length_col.column.get()); + if (!isUInt8(min_length_col.type) || !min_length_col_type_const) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument of function {} must be UInt8, got {}", @@ -76,7 +81,9 @@ public: if (arguments.size() > 3) { - if (!isString(arguments[3].type)) + const auto & alphabet_col = arguments[3]; + const auto * alphabet_col_type_const = typeid_cast(alphabet_col.column.get()); + if (!isString(alphabet_col.type) || !alphabet_col_type_const) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Fourth argument of function {} must be String, got {}", @@ -88,7 +95,7 @@ public: { throw Exception( ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, - "Function {} expect no more than three arguments (integer, salt, optional_alphabet), got {}", + "Function {} expect no more than four arguments (integer, salt, min_length, optional_alphabet), got {}", getName(), arguments.size()); } From da99b1b250922f155f015b149db3f04dbb72cdc8 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Wed, 18 May 2022 16:57:30 -0400 Subject: [PATCH 243/615] simplify hashing --- src/Functions/FunctionHashID.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index 1eb090e5b45..95ca7d75583 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -147,12 +147,9 @@ public: auto col_res = ColumnString::create(); - std::string hashid; - for (size_t i = 0; i < input_rows_count; ++i) { - hashid.assign(hash.encode({numcolumn->getUInt(i)})); - col_res->insertDataWithTerminatingZero(hashid.data(), hashid.size() + 1); + col_res->insert(hash.encode({numcolumn->getUInt(i)})); } return col_res; From d4abbf54b0f1ce77ee02174d34afa0e417571dcd Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 18 May 2022 23:21:07 +0200 Subject: [PATCH 244/615] Update 00170_s3_cache.sql --- tests/queries/1_stateful/00170_s3_cache.sql | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/queries/1_stateful/00170_s3_cache.sql b/tests/queries/1_stateful/00170_s3_cache.sql index 6549bcf5479..b03b2a16bf0 100644 --- a/tests/queries/1_stateful/00170_s3_cache.sql +++ b/tests/queries/1_stateful/00170_s3_cache.sql @@ -2,11 +2,9 @@ -- { echo } -SYSTEM DROP FILESYSTEM CACHE; - SET enable_filesystem_cache_on_write_operations=0; SET max_memory_usage='20G'; - +SYSTEM DROP FILESYSTEM CACHE; SELECT count() FROM test.hits_s3; SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM test.hits_s3 ; From 11a17997b3b4bdbd4e5b3179cb177a2f8c4622fe Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Wed, 18 May 2022 18:09:45 -0400 Subject: [PATCH 245/615] better const column checking --- src/Functions/FunctionHashID.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index 95ca7d75583..6a55d0d8afe 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -58,8 +58,7 @@ public: if (arguments.size() > 1) { const auto & hash_col = arguments[1]; - const auto * hash_col_type_const = typeid_cast(hash_col.column.get()); - if (!isString(hash_col.type) || !hash_col_type_const) + if (!isString(hash_col.type) || !isColumnConst(*hash_col.column.get())) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of function {} must be String, got {}", @@ -70,8 +69,7 @@ public: if (arguments.size() > 2) { const auto & min_length_col = arguments[2]; - const auto * min_length_col_type_const = typeid_cast(min_length_col.column.get()); - if (!isUInt8(min_length_col.type) || !min_length_col_type_const) + if (!isUInt8(min_length_col.type) || !isColumnConst(*min_length_col.column.get())) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument of function {} must be UInt8, got {}", @@ -82,8 +80,7 @@ public: if (arguments.size() > 3) { const auto & alphabet_col = arguments[3]; - const auto * alphabet_col_type_const = typeid_cast(alphabet_col.column.get()); - if (!isString(alphabet_col.type) || !alphabet_col_type_const) + if (!isString(alphabet_col.type) || !isColumnConst(*alphabet_col.column.get())) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Fourth argument of function {} must be String, got {}", From c87638d2ba1a8c7593d3dfcb2fab8ccfa3e794de Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Wed, 18 May 2022 18:37:13 -0400 Subject: [PATCH 246/615] put hashid behind allow_experimental_hash_functions setting --- src/Core/Settings.h | 1 + src/Functions/FunctionHashID.h | 11 ++++++++++- tests/queries/0_stateless/02293_hashid.sql | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2e6d657698c..2b1c458b393 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -583,6 +583,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \ diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index 6a55d0d8afe..fbfb368bec7 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int SUPPORT_IS_DISABLED; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } @@ -32,7 +34,14 @@ class FunctionHashID : public IFunction public: static constexpr auto name = "hashid"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context) + { + if (!context->getSettingsRef().allow_experimental_hash_functions) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it", name); + + return std::make_shared(); + } String getName() const override { return name; } diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql index 43dd77bb196..145bd76ccbf 100644 --- a/tests/queries/0_stateless/02293_hashid.sql +++ b/tests/queries/0_stateless/02293_hashid.sql @@ -1,3 +1,5 @@ +SET allow_experimental_hash_functions = 1; + select number, hashid(number) from system.numbers limit 5; select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; select hashid(1234567890123456, 's3cr3t'); From 00d3bbc2e022cd843492215e653eac95cdc8725d Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Wed, 18 May 2022 17:04:15 -0700 Subject: [PATCH 247/615] review fixes --- .../h3GetIndexesFromUnidirectionalEdge.cpp | 6 +++--- src/Functions/h3GetUnidirectionalEdgeBoundary.cpp | 4 ++-- .../h3GetUnidirectionalEdgesFromHexagon.cpp | 15 +++++++-------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp b/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp index e5f72deedef..91ab22aed29 100644 --- a/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp +++ b/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp @@ -79,10 +79,10 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 edge = data_hindex_edge[row]; - std::vector res; - // resize to 2 as directedEdgeToCells func sets the origin and + // allocate array of size 2 + // directedEdgeToCells func sets the origin and // destination at [0] and [1] of the input vector - res.resize(2); + std::array res; directedEdgeToCells(edge, res.data()); diff --git a/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp b/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp index 9dd2d00609e..4cbdda2cb70 100644 --- a/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp +++ b/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp @@ -85,8 +85,8 @@ public: for (int vert = 0; vert < boundary.numVerts; ++vert) { - latitude->insert(radsToDegs(boundary.verts[vert].lat)); - longitude->insert(radsToDegs(boundary.verts[vert].lng)); + latitude->getData().push_back(radsToDegs(boundary.verts[vert].lat)); + longitude->getData().push_back(radsToDegs(boundary.verts[vert].lng)); } current_offset += boundary.numVerts; diff --git a/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp b/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp index 1fca0353b62..10e0a5d8cb2 100644 --- a/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp +++ b/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp @@ -76,26 +76,25 @@ public: result_offsets.resize(input_rows_count); auto current_offset = 0; - std::vector res_vec; result_data.reserve(input_rows_count); for (size_t row = 0; row < input_rows_count; ++row) { + // allocate array of size 6 + // originToDirectedEdges places 6 edges into + // array that's passed to it + std::array res; + const UInt64 edge = data_hindex_edge[row]; - // originToDirectedEdges places only 6 edges into - // res_vec that's passed - res_vec.resize(6); + originToDirectedEdges(edge, res.data()); - originToDirectedEdges(edge, res_vec.data()); - - for (auto & i : res_vec) + for (auto & i : res) { ++current_offset; result_data.emplace_back(i); } result_offsets[row] = current_offset; - res_vec.clear(); } return ColumnArray::create(std::move(result_column_data), std::move(result_column_offsets)); } From e0c14dfc012b0a9fcf342e1e994ec85aaa8d2689 Mon Sep 17 00:00:00 2001 From: Michael Nutt Date: Wed, 18 May 2022 19:40:28 -0400 Subject: [PATCH 248/615] fix includes --- docker/test/fasttest/run.sh | 1 + src/Functions/CMakeLists.txt | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 8692c8c64c5..3a660d9cf15 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -178,6 +178,7 @@ function clone_submodules contrib/replxx contrib/wyhash contrib/eigen + contrib/hashidsxx ) git submodule sync diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index eee3bfe9bd7..60386908f01 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -23,6 +23,7 @@ target_link_libraries(clickhouse_functions dbms ch_contrib::metrohash ch_contrib::murmurhash + ch_contrib::hashidsxx PRIVATE ch_contrib::zlib @@ -97,10 +98,6 @@ if (TARGET ch_contrib::rapidjson) target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson) endif() -if (TARGET ch_contrib::hashidsxx) - target_link_libraries(clickhouse_functions PRIVATE ch_contrib::hashidsxx) -endif() - add_subdirectory(GatherUtils) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils) From 3e050547927998da338e881e53112d8c0174640c Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 19 May 2022 01:48:47 +0000 Subject: [PATCH 249/615] disable parallel test --- .../0_stateless/01065_window_view_event_hop_watch_bounded.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index 9604632fca3..169c59cc5af 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys From 16513f2a10bef5c489323b4029ca2cc39cdb71ae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 May 2022 05:19:44 +0200 Subject: [PATCH 250/615] Add changelog for 22.5 --- CHANGELOG.md | 166 +++++++++++++++++++- utils/changelog-simple/README.md | 21 +++ utils/changelog-simple/changelog.sh | 96 ++++++++++++ utils/changelog-simple/format-changelog.py | 167 +++++++++++++++++++++ utils/changelog/README.md | 13 +- utils/changelog/changelog.sh | 96 ++++++++++++ utils/changelog/format-changelog.py | 165 ++++++++++++++++++++ 7 files changed, 718 insertions(+), 6 deletions(-) create mode 100644 utils/changelog-simple/README.md create mode 100755 utils/changelog-simple/changelog.sh create mode 100755 utils/changelog-simple/format-changelog.py create mode 100755 utils/changelog/changelog.sh create mode 100755 utils/changelog/format-changelog.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c855995d6c..b6babf03c7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,175 @@ ### Table of Contents +**[ClickHouse release v22.5, 2022-05-19](#225)**
**[ClickHouse release v22.4, 2022-04-20](#224)**
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**
**[ClickHouse release v22.2, 2022-02-17](#222)**
**[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**
-### ClickHouse release master FIXME as compared to v22.3.3.44-lts +### ClickHouse release 22.5, 2022-05-19 + +#### Upgrade Notes + +* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values, and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes of metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. +* `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB. +* Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature + +* Enable memory overcommit by default. [#35921](https://github.com/ClickHouse/ClickHouse/pull/35921) ([Dmitry Novik](https://github.com/novikd)). +* Add support of GROUPING SETS in GROUP BY clause. This implementation supports a parallel processing of grouping sets. [#33631](https://github.com/ClickHouse/ClickHouse/pull/33631) ([Dmitry Novik](https://github.com/novikd)). +* Added `system.certificates` table. [#37142](https://github.com/ClickHouse/ClickHouse/pull/37142) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Implemented L1, L2, Linf, Cosine distance functions for arrays and L1, L2, Linf norm functions for arrays. + [#37033](https://github.com/ClickHouse/ClickHouse/pull/37033) ([qieqieplus](https://github.com/qieqieplus)). +* Adds `h3Line`, `h3Distance` and `h3HexRing` functions. [#37030](https://github.com/ClickHouse/ClickHouse/pull/37030) ([Bharat Nallan](https://github.com/bharatnc)). +* New single binary based diagnostics tool (clickhouse-diagnostics). [#36705](https://github.com/ClickHouse/ClickHouse/pull/36705) ([Dale McDiarmid](https://github.com/gingerwizard)). +* Add output format `Prometheus` [#36051](https://github.com/ClickHouse/ClickHouse/issues/36051). [#36206](https://github.com/ClickHouse/ClickHouse/pull/36206) ([Vladimir C](https://github.com/vdimir)). +* Add `MySQLDump` input format. It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. [#36667](https://github.com/ClickHouse/ClickHouse/pull/36667) ([Kruglov Pavel](https://github.com/Avogar)). +* Window function `nth_value` is added. [#36601](https://github.com/ClickHouse/ClickHouse/pull/36601) ([Nikolay](https://github.com/ndchikin)). +* Show the `total_rows` and `total_bytes` fields in `system.tables` for temporary tables. [#36401](https://github.com/ClickHouse/ClickHouse/issues/36401). [#36439](https://github.com/ClickHouse/ClickHouse/pull/36439) ([xiedeyantu](https://github.com/xiedeyantu)). +* Allow to override `parts_to_delay_insert` and `parts_to_throw_insert` with query-level settings. If they are defined, they will override table-level settings. [#36371](https://github.com/ClickHouse/ClickHouse/pull/36371) ([Memo](https://github.com/Joeywzr)). + +#### Experimental Feature + +* Improve the `WATCH` query in WindowView: 1. Reduce the latency of providing query results by calling the `fire_condition` signal. 2. Makes the cancel query operation(ctrl-c) faster, by checking `isCancelled()` more frequently. [#37226](https://github.com/ClickHouse/ClickHouse/pull/37226) ([vxider](https://github.com/Vxider)). +* Introspection for remove filesystem cache. [#36802](https://github.com/ClickHouse/ClickHouse/pull/36802) ([Han Shukai](https://github.com/KinderRiven)). +* Added new hash function `wyHash64` for SQL. [#36467](https://github.com/ClickHouse/ClickHouse/pull/36467) ([olevino](https://github.com/olevino)). +* Improvement for replicated databases: Added `SYSTEM SYNC DATABASE REPLICA` query which allows to sync tables metadata inside Replicated database, because currently synchronisation is asynchronous. [#35944](https://github.com/ClickHouse/ClickHouse/pull/35944) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Improvement for remote filesystem cache: Better read from cache. [#37054](https://github.com/ClickHouse/ClickHouse/pull/37054) ([Kseniia Sumarokova](https://github.com/kssenii)). Improve `SYSTEM DROP FILESYSTEM CACHE` query: `` option and `FORCE` option. [#36639](https://github.com/ClickHouse/ClickHouse/pull/36639) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improvement for semistructured data: Allow to cast columns of type `Object(...)` to `Object(Nullable(...))`. [#36564](https://github.com/ClickHouse/ClickHouse/pull/36564) ([awakeljw](https://github.com/awakeljw)). +* Improvement for parallel replicas: We create a local interpreter if we want to execute query on localhost replica. But for when executing query on multiple replicas we rely on the fact that a connection exists so replicas can talk to coordinator. It is now improved and localhost replica can talk to coordinator directly in the same process. [#36281](https://github.com/ClickHouse/ClickHouse/pull/36281) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Performance Improvement + +* Improve performance of `avg`, `sum` aggregate functions if used without GROUP BY expression. [#37257](https://github.com/ClickHouse/ClickHouse/pull/37257) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of unary arithmetic functions (`bitCount`, `bitNot`, `abs`, `intExp2`, `intExp10`, `negate`, `roundAge`, `roundDuration`, `roundToExp2`, `sign`) using dynamic dispatch. [#37289](https://github.com/ClickHouse/ClickHouse/pull/37289) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance or ORDER BY, MergeJoin, insertion into MergeTree using JIT compilation of sort columns comparator. [#34469](https://github.com/ClickHouse/ClickHouse/pull/34469) ([Maksim Kita](https://github.com/kitaisreal)). +* Change structure of `system.asynchronous_metric_log`. It will take about 10 times less space. This closes [#36357](https://github.com/ClickHouse/ClickHouse/issues/36357). The field `event_time_microseconds` was removed, because it is useless. [#36360](https://github.com/ClickHouse/ClickHouse/pull/36360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Load marks for only necessary columns when reading wide parts. [#36879](https://github.com/ClickHouse/ClickHouse/pull/36879) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Improves performance of file descriptor cache by narrowing mutex scopes. [#36682](https://github.com/ClickHouse/ClickHouse/pull/36682) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Improve performance of reading from storage `File` and table functions `file` in case when path has globs and matched directory contains large number of files. [#36647](https://github.com/ClickHouse/ClickHouse/pull/36647) ([Anton Popov](https://github.com/CurtizJ)). +* Apply parallel parsing for input format `HiveText`, which can speed up HiveText parsing by 2x when reading local file. [#36650](https://github.com/ClickHouse/ClickHouse/pull/36650) ([李扬](https://github.com/taiyang-li)). +* The default `HashJoin` is not thread safe for inserting right table's rows and run it in a single thread. When the right table is large, the join process is too slow with low cpu utilization. [#36415](https://github.com/ClickHouse/ClickHouse/pull/36415) ([lgbo](https://github.com/lgbo-ustc)). +* Allow to rewrite `select countDistinct(a) from t` to `select count(1) from (select a from t groupBy a)`. [#35993](https://github.com/ClickHouse/ClickHouse/pull/35993) ([zhanglistar](https://github.com/zhanglistar)). +* Transform OR LIKE chain to multiMatchAny. Will enable once we have more confidence it works. [#34932](https://github.com/ClickHouse/ClickHouse/pull/34932) ([Daniel Kutenin](https://github.com/danlark1)). +* Improve performance of some functions with inlining. [#34544](https://github.com/ClickHouse/ClickHouse/pull/34544) ([Daniel Kutenin](https://github.com/danlark1)). +* Add a branch to avoid unnecessary memcpy in readBig. It improves performance somewhat. [#36095](https://github.com/ClickHouse/ClickHouse/pull/36095) ([jasperzhu](https://github.com/jinjunzh)). +* Implement partial GROUP BY key for optimize_aggregation_in_order. [#35111](https://github.com/ClickHouse/ClickHouse/pull/35111) ([Azat Khuzhin](https://github.com/azat)). + +#### Improvement + +* Show names of erroneous files in case of parsing errors while executing table functions `file`, `s3` and `url`. [#36314](https://github.com/ClickHouse/ClickHouse/pull/36314) ([Anton Popov](https://github.com/CurtizJ)). +* Allowed to increase the number of threads for executing background operations (merges, mutations, moves and fetches) at runtime if they are specified at top level config. [#36425](https://github.com/ClickHouse/ClickHouse/pull/36425) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now date time conversion functions that generates time before 1970-01-01 00:00:00 with partial hours/minutes timezones will be saturated to zero instead of overflow. This is the continuation of https://github.com/ClickHouse/ClickHouse/pull/29953 which addresses https://github.com/ClickHouse/ClickHouse/pull/29953#discussion_r800550280 . Mark as improvement because it's implementation defined behavior (and very rare case) and we are allowed to break it. [#36656](https://github.com/ClickHouse/ClickHouse/pull/36656) ([Amos Bird](https://github.com/amosbird)). +* Add a warning if someone running clickhouse-server with log level "test". The log level "test" was added recently and cannot be used in production due to inevitable, unavoidable, fatal and life-threatening performance degradation. [#36824](https://github.com/ClickHouse/ClickHouse/pull/36824) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parse collations in CREATE TABLE, throw exception or ignore. closes [#35892](https://github.com/ClickHouse/ClickHouse/issues/35892). [#36271](https://github.com/ClickHouse/ClickHouse/pull/36271) ([yuuch](https://github.com/yuuch)). +* Option `compatibility_ignore_auto_increment_in_create_table` allows ignoring `AUTO_INCREMENT` keyword in a column declaration to simplify migration from MySQL. [#37178](https://github.com/ClickHouse/ClickHouse/pull/37178) ([Igor Nikonov](https://github.com/devcrafter)). +* Add aliases `JSONLines` and `NDJSON` for `JSONEachRow`. Closes [#36303](https://github.com/ClickHouse/ClickHouse/issues/36303). [#36327](https://github.com/ClickHouse/ClickHouse/pull/36327) ([flynn](https://github.com/ucasfl)). +* Limit the max partitions could be queried for each hive table. Avoid resource overruns. [#37281](https://github.com/ClickHouse/ClickHouse/pull/37281) ([lgbo](https://github.com/lgbo-ustc)). +* Added implicit cast for `h3kRing` function second argument to improve usability. Closes [#35432](https://github.com/ClickHouse/ClickHouse/issues/35432). [#37189](https://github.com/ClickHouse/ClickHouse/pull/37189) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix progress indication for `INSERT SELECT` in `clickhouse-local` for any query and for file progress in client, more correct file progress. [#37075](https://github.com/ClickHouse/ClickHouse/pull/37075) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug which can lead to forgotten outdated parts in MergeTree table engines family in case of filesystem failures during parts removal. Before fix they will be removed only after first server restart. [#37014](https://github.com/ClickHouse/ClickHouse/pull/37014) ([alesapin](https://github.com/alesapin)). +* Implemented a new mode of handling row policies which can be enabled in the main configuration which enables users without permissive row policies to read rows. [#36997](https://github.com/ClickHouse/ClickHouse/pull/36997) ([Vitaly Baranov](https://github.com/vitlibar)). +* Play UI: Nullable numbers will be aligned to the right in table cells. This closes [#36982](https://github.com/ClickHouse/ClickHouse/issues/36982). [#36988](https://github.com/ClickHouse/ClickHouse/pull/36988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Play UI: If there is one row in result and more than a few columns, display the result vertically. Continuation of [#36811](https://github.com/ClickHouse/ClickHouse/issues/36811). [#36842](https://github.com/ClickHouse/ClickHouse/pull/36842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup CSS in Play UI. The pixels are more evenly placed. Better usability for long content in table cells. [#36569](https://github.com/ClickHouse/ClickHouse/pull/36569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Finalize write buffers in case of exception to avoid doing it in destructors. Hope it fixes: [#36907](https://github.com/ClickHouse/ClickHouse/issues/36907). [#36979](https://github.com/ClickHouse/ClickHouse/pull/36979) ([Kruglov Pavel](https://github.com/Avogar)). +* After [#36425](https://github.com/ClickHouse/ClickHouse/issues/36425) settings like `background_fetches_pool_size` became obsolete and can appear in top level config, but clickhouse throws and exception like `Error updating configuration from '/etc/clickhouse-server/config.xml' config.: Code: 137. DB::Exception: A setting 'background_fetches_pool_size' appeared at top level in config /etc/clickhouse-server/config.xml.` This is fixed. [#36917](https://github.com/ClickHouse/ClickHouse/pull/36917) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add extra diagnostic info (if applicable) when sending exception to other server. [#36872](https://github.com/ClickHouse/ClickHouse/pull/36872) ([tavplubix](https://github.com/tavplubix)). +* Allow to execute hash functions with arguments of type `Array(Tuple(..))`. [#36812](https://github.com/ClickHouse/ClickHouse/pull/36812) ([Anton Popov](https://github.com/CurtizJ)). +* Added `user_defined_path` config setting. [#36753](https://github.com/ClickHouse/ClickHouse/pull/36753) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow cluster macro in `s3Cluster` table function. [#36726](https://github.com/ClickHouse/ClickHouse/pull/36726) ([Vadim Volodin](https://github.com/PolyProgrammist)). +* Properly cancel INSERT queries in `clickhouse-client`/`clickhouse-local`. [#36710](https://github.com/ClickHouse/ClickHouse/pull/36710) ([Azat Khuzhin](https://github.com/azat)). +* Allow to cancel a query while still keeping a decent query id in `MySQLHandler`. [#36699](https://github.com/ClickHouse/ClickHouse/pull/36699) ([Amos Bird](https://github.com/amosbird)). +* Add `is_all_data_sent` column into `system.processes`, and improve internal testing hardening check based on it. [#36649](https://github.com/ClickHouse/ClickHouse/pull/36649) ([Azat Khuzhin](https://github.com/azat)). +* The metrics about time spent reading from s3 now calculated correctly. Close [#35483](https://github.com/ClickHouse/ClickHouse/issues/35483). [#36572](https://github.com/ClickHouse/ClickHouse/pull/36572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow file descriptors in table function file if it is run in clickhouse-local. [#36562](https://github.com/ClickHouse/ClickHouse/pull/36562) ([wuxiaobai24](https://github.com/wuxiaobai24)). +* Allow names of tuple elements that start from digits. [#36544](https://github.com/ClickHouse/ClickHouse/pull/36544) ([Anton Popov](https://github.com/CurtizJ)). +* Now clickhouse-benchmark can read authentication info from environment variables. [#36497](https://github.com/ClickHouse/ClickHouse/pull/36497) ([Anton Kozlov](https://github.com/tonickkozlov)). +* `clickhouse-keeper` improvement: add support for force recovery which allows you to reconfigure cluster without quorum. [#36258](https://github.com/ClickHouse/ClickHouse/pull/36258) ([Antonio Andelic](https://github.com/antonio2368)). +* Improve schema inference for JSON objects. [#36207](https://github.com/ClickHouse/ClickHouse/pull/36207) ([Kruglov Pavel](https://github.com/Avogar)). +* Refactor code around schema inference with globs. Try next file from glob only if it makes sense (previously we tried next file in case of any error). Also it fixes [#36317](https://github.com/ClickHouse/ClickHouse/issues/36317). [#36205](https://github.com/ClickHouse/ClickHouse/pull/36205) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a separate `CLUSTER` grant (and `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive, for backward compatibility, default to `false`). [#35767](https://github.com/ClickHouse/ClickHouse/pull/35767) ([Azat Khuzhin](https://github.com/azat)). +* If the required amount of memory is available before the selected query stopped, all waiting queries continue execution. Now we don't stop any query if memory is freed before the moment when the selected query knows about the cancellation. [#35637](https://github.com/ClickHouse/ClickHouse/pull/35637) ([Dmitry Novik](https://github.com/novikd)). +* Nullables detection in protobuf. In proto3, default values are not sent on the wire. This makes it non-trivial to distinguish between null and default values for Nullable columns. A standard way to deal with this problem is to use Google wrappers to nest the target value within an inner message (see https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/wrappers.proto). In this case, a missing field is interpreted as null value, a field with missing value if interpreted as default value, and a field with regular value is interpreted as regular value. However, ClickHouse interprets Google wrappers as nested columns. We propose to introduce special behaviour to detect Google wrappers and interpret them like in the description above. For example, to serialize values for a Nullable column `test`, we would use `google.protobuf.StringValue test` in our .proto schema. Note that these types are so called "well-known types" in Protobuf, implemented in the library itself. [#35149](https://github.com/ClickHouse/ClickHouse/pull/35149) ([Jakub Kuklis](https://github.com/jkuklis)). +* Added support for specifying `content_type` in predefined and static HTTP handler config. [#34916](https://github.com/ClickHouse/ClickHouse/pull/34916) ([Roman Nikonov](https://github.com/nic11)). +* Warn properly if use clickhouse-client --file without preceeding --external. Close [#34747](https://github.com/ClickHouse/ClickHouse/issues/34747). [#34765](https://github.com/ClickHouse/ClickHouse/pull/34765) ([李扬](https://github.com/taiyang-li)). +* Improve MySQL database engine to compatible with binary(0) dataType. [#37232](https://github.com/ClickHouse/ClickHouse/pull/37232) ([zzsmdfj](https://github.com/zzsmdfj)). +* Improve JSON report of clickhouse-benchmark. [#36473](https://github.com/ClickHouse/ClickHouse/pull/36473) ([Tian Xinhui](https://github.com/xinhuitian)). +* Server might refuse to start if it cannot resolve hostname of external ClickHouse dictionary. It's fixed. Fixes [#36451](https://github.com/ClickHouse/ClickHouse/issues/36451). [#36463](https://github.com/ClickHouse/ClickHouse/pull/36463) ([tavplubix](https://github.com/tavplubix)). + +#### Build/Testing/Packaging Improvement + +* Now `clickhouse-keeper` for the `x86_64` architecture is statically linked with [musl](https://musl.libc.org/) and doesn't depend on any system libraries. [#31833](https://github.com/ClickHouse/ClickHouse/pull/31833) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse builds for `PowerPC64LE` architecture are now available in universal installation script `curl https://clickhouse.com/ | sh` and by direct link `https://builds.clickhouse.com/master/powerpc64le/clickhouse`. [#37095](https://github.com/ClickHouse/ClickHouse/pull/37095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Limit PowerPC code generation to Power8 for better compatibility. This closes [#36025](https://github.com/ClickHouse/ClickHouse/issues/36025). [#36529](https://github.com/ClickHouse/ClickHouse/pull/36529) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simplify performance test. This will give a chance for us to use it. [#36769](https://github.com/ClickHouse/ClickHouse/pull/36769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fail performance comparison on errors in the report. [#34797](https://github.com/ClickHouse/ClickHouse/pull/34797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add ZSTD support for Arrow. This fixes [#35283](https://github.com/ClickHouse/ClickHouse/issues/35283). [#35486](https://github.com/ClickHouse/ClickHouse/pull/35486) ([Sean Lafferty](https://github.com/seanlaff)). + +#### Bug Fix + +* Extracts Version ID if present from the URI and adds a request to the AWS HTTP URI. Closes [#31221](https://github.com/ClickHouse/ClickHouse/issues/31221). - [x] Extract `Version ID` from URI if present and reassemble without it. - [x] Configure `AWS HTTP URI` object with request. - [x] Unit Tests: [`gtest_s3_uri`](https://github.com/ClickHouse/ClickHouse/blob/2340a6c6849ebc05a8efbf97ba8de3ff9dc0eff4/src/IO/tests/gtest_s3_uri.cpp) - [x] Drop instrumentation commit. [#34571](https://github.com/ClickHouse/ClickHouse/pull/34571) ([Saad Ur Rahman](https://github.com/surahman)). +* Fix system.opentelemetry_span_log attribute.values alias to values instead of keys. [#37275](https://github.com/ClickHouse/ClickHouse/pull/37275) ([Aleksandr Razumov](https://github.com/ernado)). +* Fix Nullable(String) to Nullable(Bool/IPv4/IPv6) conversion Closes [#37221](https://github.com/ClickHouse/ClickHouse/issues/37221). [#37270](https://github.com/ClickHouse/ClickHouse/pull/37270) ([Kruglov Pavel](https://github.com/Avogar)). +* Experimental feature: Fix execution of mutations in tables, in which there exist columns of type `Object`. Using subcolumns of type `Object` in `WHERE` expression of `UPDATE` or `DELETE` queries is now allowed yet, as well as manipulating (`DROP`, `MODIFY`) of separate subcolumns. Fixes [#37205](https://github.com/ClickHouse/ClickHouse/issues/37205). [#37266](https://github.com/ClickHouse/ClickHouse/pull/37266) ([Anton Popov](https://github.com/CurtizJ)). +* Kafka does not need `group.id` on producer stage. In console log you can find Warning that describe this issue: ``` 2022.05.15 17:59:13.270227 [ 137 ] {} StorageKafka (topic-name): [rdk:CONFWARN] [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance ```. [#37228](https://github.com/ClickHouse/ClickHouse/pull/37228) ([Mark Andreev](https://github.com/mrk-andreev)). +* Experimental feature (WindowView): Update `max_fired_watermark ` after blocks actually fired, in case delete data that hasn't been fired yet. [#37225](https://github.com/ClickHouse/ClickHouse/pull/37225) ([vxider](https://github.com/Vxider)). +* Fix "Cannot create column of type Set" for distributed queries with LIMIT BY. [#37193](https://github.com/ClickHouse/ClickHouse/pull/37193) ([Azat Khuzhin](https://github.com/azat)). +* Experimental feature: Now WindowView `WATCH EVENTS` query will not be terminated due to the nonempty Chunk created in `WindowViewSource.h:58`. [#37182](https://github.com/ClickHouse/ClickHouse/pull/37182) ([vxider](https://github.com/Vxider)). +* Enable `enable_global_with_statement` for subqueries, close [#37141](https://github.com/ClickHouse/ClickHouse/issues/37141). [#37166](https://github.com/ClickHouse/ClickHouse/pull/37166) ([Vladimir C](https://github.com/vdimir)). +* Fix implicit cast for optimize_skip_unused_shards_rewrite_in. [#37153](https://github.com/ClickHouse/ClickHouse/pull/37153) ([Azat Khuzhin](https://github.com/azat)). +* The ILIKE function on FixedString columns could have returned wrong results (i.e. match less than it should). [#37117](https://github.com/ClickHouse/ClickHouse/pull/37117) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `GROUP BY` `AggregateFunction` (i.e. you `GROUP BY` by the column that has `AggregateFunction` type). [#37093](https://github.com/ClickHouse/ClickHouse/pull/37093) ([Azat Khuzhin](https://github.com/azat)). +* Experimental feature: Fix optimize_aggregation_in_order with prefix GROUP BY and *Array aggregate functions. [#37050](https://github.com/ClickHouse/ClickHouse/pull/37050) ([Azat Khuzhin](https://github.com/azat)). +* Fixed performance degradation of some INSERT SELECT queries with implicit aggregation. Fixes [#36792](https://github.com/ClickHouse/ClickHouse/issues/36792). [#37047](https://github.com/ClickHouse/ClickHouse/pull/37047) ([tavplubix](https://github.com/tavplubix)). +* Experimental feature: Fix in-order `GROUP BY` (`optimize_aggregation_in_order=1`) with `*Array` (`groupArrayArray`/...) aggregate functions. [#37046](https://github.com/ClickHouse/ClickHouse/pull/37046) ([Azat Khuzhin](https://github.com/azat)). +* Fix LowCardinality->ArrowDictionary invalid output when type of indexes is not UInt8. Closes [#36832](https://github.com/ClickHouse/ClickHouse/issues/36832). [#37043](https://github.com/ClickHouse/ClickHouse/pull/37043) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed problem with infs in `quantileTDigest`. Fixes [#32107](https://github.com/ClickHouse/ClickHouse/issues/32107). [#37021](https://github.com/ClickHouse/ClickHouse/pull/37021) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix sending external tables data in HedgedConnections with max_parallel_replicas != 1. [#36981](https://github.com/ClickHouse/ClickHouse/pull/36981) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed logical error on `TRUNCATE` query in `Replicated` database. Fixes [#33747](https://github.com/ClickHouse/ClickHouse/issues/33747). [#36976](https://github.com/ClickHouse/ClickHouse/pull/36976) ([tavplubix](https://github.com/tavplubix)). +* Experimental feature: Fix stuck when dropping source table in WindowView. Closes [#35678](https://github.com/ClickHouse/ClickHouse/issues/35678). [#36967](https://github.com/ClickHouse/ClickHouse/pull/36967) ([vxider](https://github.com/Vxider)). +* Experimental feature (rocksdb cache): Fix issue: [#36671](https://github.com/ClickHouse/ClickHouse/issues/36671). [#36929](https://github.com/ClickHouse/ClickHouse/pull/36929) ([李扬](https://github.com/taiyang-li)). +* Experimental feature: Fix bugs when using multiple columns in WindowView by adding converting actions to make it possible to call`writeIntoWindowView` with a slightly different schema. [#36928](https://github.com/ClickHouse/ClickHouse/pull/36928) ([vxider](https://github.com/Vxider)). +* Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)). +* Fix incorrect query result when doing constant aggregation. This fixes [#36728](https://github.com/ClickHouse/ClickHouse/issues/36728) . [#36888](https://github.com/ClickHouse/ClickHouse/pull/36888) ([Amos Bird](https://github.com/amosbird)). +* Experimental feature: Fix `current_size` count in cache. [#36887](https://github.com/ClickHouse/ClickHouse/pull/36887) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Experimental feature: Fix fire in window view with hop window [#34044](https://github.com/ClickHouse/ClickHouse/issues/34044). [#36861](https://github.com/ClickHouse/ClickHouse/pull/36861) ([vxider](https://github.com/Vxider)). +* Experimental feature: Fix incorrect cast in cached buffer from remote fs. [#36809](https://github.com/ClickHouse/ClickHouse/pull/36809) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix creation of tables with `flatten_nested = 0`. Previously unflattened `Nested` columns could be flattened after server restart. [#36803](https://github.com/ClickHouse/ClickHouse/pull/36803) ([Anton Popov](https://github.com/CurtizJ)). +* Fix some issues with async reads from remote filesystem which happened when reading low cardinality. [#36763](https://github.com/ClickHouse/ClickHouse/pull/36763) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Experimental feature: Fix insertion to columns of type `Object` from multiple files, e.g. via table function `file` with globs. [#36762](https://github.com/ClickHouse/ClickHouse/pull/36762) ([Anton Popov](https://github.com/CurtizJ)). +* Fix timeouts in Hedged requests. Connection hang right after sending remote query could lead to eternal waiting. [#36749](https://github.com/ClickHouse/ClickHouse/pull/36749) ([Kruglov Pavel](https://github.com/Avogar)). +* Experimental feature: Fix a bug of `groupBitmapAndState`/`groupBitmapOrState`/`groupBitmapXorState` on distributed table. [#36739](https://github.com/ClickHouse/ClickHouse/pull/36739) ([Zhang Yifan](https://github.com/zhangyifan27)). +* Experimental feature: During the [test](https://s3.amazonaws.com/clickhouse-test-reports/36376/1cb1c7275cb53769ab826772db9b71361bb3e413/stress_test__thread__actions_/clickhouse-server.clean.log) in [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that the one cache class was initialized twice, it throws a exception. Although the cause of this problem is not clear, there should be code logic of repeatedly loading disk in ClickHouse, so we need to make special judgment for this situation. [#36737](https://github.com/ClickHouse/ClickHouse/pull/36737) ([Han Shukai](https://github.com/KinderRiven)). +* Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). +* Fix server reload on port change (do not wait for current connections from query context). [#36700](https://github.com/ClickHouse/ClickHouse/pull/36700) ([Azat Khuzhin](https://github.com/azat)). +* Experimental feature: In the previous [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that testing (stateless tests, flaky check (address, actions)) is timeout. Moreover, testing locally can also trigger unstable system deadlocks. This problem still exists when using the latest source code of master. [#36697](https://github.com/ClickHouse/ClickHouse/pull/36697) ([Han Shukai](https://github.com/KinderRiven)). +* Experimental feature: Fix server restart if cache configuration changed. [#36685](https://github.com/ClickHouse/ClickHouse/pull/36685) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible heap-use-after-free in schema inference. Closes [#36661](https://github.com/ClickHouse/ClickHouse/issues/36661). [#36679](https://github.com/ClickHouse/ClickHouse/pull/36679) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed parsing of query settings in `CREATE` query when engine is not specified. Fixes https://github.com/ClickHouse/ClickHouse/pull/34187#issuecomment-1103812419. [#36642](https://github.com/ClickHouse/ClickHouse/pull/36642) ([tavplubix](https://github.com/tavplubix)). +* Experimental feature: Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). +* Fix format crash when default expression follow EPHEMERAL not literal. Closes [#36618](https://github.com/ClickHouse/ClickHouse/issues/36618). [#36633](https://github.com/ClickHouse/ClickHouse/pull/36633) ([flynn](https://github.com/ucasfl)). +* Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix potential error with literals in `WHERE` for join queries. Close [#36279](https://github.com/ClickHouse/ClickHouse/issues/36279). [#36542](https://github.com/ClickHouse/ClickHouse/pull/36542) ([Vladimir C](https://github.com/vdimir)). +* Fix offset update ReadBufferFromEncryptedFile, which could cause undefined behaviour. [#36493](https://github.com/ClickHouse/ClickHouse/pull/36493) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix hostname sanity checks for Keeper cluster configuration. Add `keeper_server.host_checks_enabled` config to enable/disable those checks. [#36492](https://github.com/ClickHouse/ClickHouse/pull/36492) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix usage of executable user defined functions in GROUP BY. Before executable user defined functions cannot be used as expressions in GROUP BY. Closes [#36448](https://github.com/ClickHouse/ClickHouse/issues/36448). [#36486](https://github.com/ClickHouse/ClickHouse/pull/36486) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible exception with unknown packet from server in client. [#36481](https://github.com/ClickHouse/ClickHouse/pull/36481) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Experimental feature (please never use `system.session_log`, it is going to be removed): Add missing enum values in system.session_log table. Closes [#36474](https://github.com/ClickHouse/ClickHouse/issues/36474). [#36480](https://github.com/ClickHouse/ClickHouse/pull/36480) ([Memo](https://github.com/Joeywzr)). +* Fix bug in s3Cluster schema inference that let to the fact that not all data was read in the select from s3Cluster. The bug appeared in https://github.com/ClickHouse/ClickHouse/pull/35544. [#36434](https://github.com/ClickHouse/ClickHouse/pull/36434) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416). This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). +* Fix dictionary reload for `ClickHouseDictionarySource` if it contains scalar subqueries. [#36390](https://github.com/ClickHouse/ClickHouse/pull/36390) ([lthaooo](https://github.com/lthaooo)). +* Fix assertion in JOIN, close [#36199](https://github.com/ClickHouse/ClickHouse/issues/36199). [#36201](https://github.com/ClickHouse/ClickHouse/pull/36201) ([Vladimir C](https://github.com/vdimir)). +* Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Experimental feature: Fix insertion of complex JSONs with nested arrays to columns of type `Object`. [#36077](https://github.com/ClickHouse/ClickHouse/pull/36077) ([Anton Popov](https://github.com/CurtizJ)). +* Fix ALTER DROP COLUMN of nested column with compact parts (i.e. `ALTER TABLE x DROP COLUMN n`, when there is column `n.d`). [#35797](https://github.com/ClickHouse/ClickHouse/pull/35797) ([Azat Khuzhin](https://github.com/azat)). +* Fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)). + + +### ClickHouse release 22.4, 2022-04-19 #### Backward Incompatible Change diff --git a/utils/changelog-simple/README.md b/utils/changelog-simple/README.md new file mode 100644 index 00000000000..cd8f8da9b61 --- /dev/null +++ b/utils/changelog-simple/README.md @@ -0,0 +1,21 @@ +## How To Generate Changelog + +Generate github token: +* https://github.com/settings/tokens - keep all checkboxes unchecked, no scopes need to be enabled. + +Dependencies: +``` +sudo apt-get install git curl jq python3 python3-fuzzywuzzy +``` + +Update information about tags: +``` +git fetch --tags +``` + +Usage example: + +``` +export GITHUB_USER=... GITHUB_TOKEN=ghp_... +./changelog.sh v21.5.6.6-stable v21.6.2.7-prestable +``` diff --git a/utils/changelog-simple/changelog.sh b/utils/changelog-simple/changelog.sh new file mode 100755 index 00000000000..52817acfae4 --- /dev/null +++ b/utils/changelog-simple/changelog.sh @@ -0,0 +1,96 @@ +#!/bin/bash +set -e + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +from="$1" +to="$2" +log_command=(git log "$from..$to" --first-parent) + +"${log_command[@]}" > "changelog-log.txt" + +# Check for diamond merges. +if "${log_command[@]}" --oneline --grep "Merge branch '" | grep '' +then + # DO NOT ADD automated handling of diamond merges to this script. + # It is an unsustainable way to work with git, and it MUST be visible. + echo Warning: suspected diamond merges above. + echo Some commits will be missed, review these manually. +fi + +# Search for PR numbers in commit messages. First variant is normal merge, and second +# variant is squashed. Next are some backport message variants. +find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*(#\([[:digit:]]\+\))$/\1/p; + s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") + +# awk is to filter out small task numbers from different task tracker, which are +# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. +"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" + +echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." +if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi + +function github_download() +{ + local url=${1} + local file=${2} + if ! [ -f "$file" ] + then + echo "curl -u \"$GITHUB_USER:***\" -sSf \"$url\" > \"$file\"" + + if ! curl -u "$GITHUB_USER:$GITHUB_TOKEN" \ + -sSf "$url" \ + > "$file" + then + >&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'." + rm "$file" + return 1 + fi + sleep 0.1 + fi +} + +rm changelog-prs-filtered.txt &> /dev/null ||: +for pr in $(cat "changelog-prs.txt") +do + # Download PR info from github. + file="pr$pr.json" + github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue + + if ! [ "$pr" == "$(jq -r .number "$file")" ] + then + >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." + continue + fi + + # Filter out PRs by bots. + user_login=$(jq -r .user.login "$file") + + filter_bot=$(echo "$user_login" | grep -q "\[bot\]$" && echo "Skip." || echo "Ok." ||:) + filter_robot=$(echo "$user_login" | grep -q "robot-clickhouse" && echo "Skip." || echo "Ok." ||:) + + if [ "Skip." == "$filter_robot" ] || [ "Skip." == "$filter_bot" ] + then + continue + fi + + # Download author info from github. + user_id=$(jq -r .user.id "$file") + user_file="user$user_id.json" + github_download "$(jq -r .user.url "$file")" "$user_file" || continue + + if ! [ "$user_id" == "$(jq -r .id "$user_file")" ] + then + >&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')." + continue + fi + + echo "$pr" >> changelog-prs-filtered.txt +done + +echo "### ClickHouse release $to FIXME as compared to $from +" > changelog.md +"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md +cat changelog.md diff --git a/utils/changelog-simple/format-changelog.py b/utils/changelog-simple/format-changelog.py new file mode 100755 index 00000000000..2a407c23965 --- /dev/null +++ b/utils/changelog-simple/format-changelog.py @@ -0,0 +1,167 @@ +#!/usr/bin/python3 + +import argparse +import collections +import fuzzywuzzy.fuzz +import itertools +import json +import os +import re +import sys + +parser = argparse.ArgumentParser(description="Format changelog for given PRs.") +parser.add_argument( + "file", + metavar="FILE", + type=argparse.FileType("r", encoding="utf-8"), + nargs="?", + default=sys.stdin, + help="File with PR numbers, one per line.", +) +args = parser.parse_args() + +# This function mirrors the PR description checks in ClickhousePullRequestTrigger. +# Returns False if the PR should not be mentioned changelog. +def parse_one_pull_request(item): + description = item["body"] + # Don't skip empty lines because they delimit parts of description + lines = [ + line + for line in [ + x.strip() for x in (description.split("\n") if description else []) + ] + ] + lines = [re.sub(r"\s+", " ", l) for l in lines] + + category = "" + entry = "" + + if lines: + i = 0 + while i < len(lines): + if re.match(r"(?i).*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category itself. Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + + elif re.match( + r"(?i).*change\s*log\s*entry", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + else: + i += 1 + + + if not category: + # Shouldn't happen, because description check in CI should catch such PRs. + # Fall through, so that it shows up in output and the user can fix it. + category = "NO CL CATEGORY" + + # Filter out the PR categories that are not for changelog. + if re.match( + r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", + category, + ): + return False + + if not entry: + # Shouldn't happen, because description check in CI should catch such PRs. + category = "NO CL ENTRY" + entry = "NO CL ENTRY: '" + item["title"] + "'" + + entry = entry.strip() + if entry[-1] != ".": + entry += "." + + item["entry"] = entry + item["category"] = category + + return True + + +# This array gives the preferred category order, and is also used to +# normalize category names. +categories_preferred_order = [ + "Backward Incompatible Change", + "New Feature", + "Performance Improvement", + "Improvement", + "Bug Fix", + "Build/Testing/Packaging Improvement", + "Other", +] + +category_to_pr = collections.defaultdict(lambda: []) +users = {} +for line in args.file: + pr = json.loads(open(f"pr{line.strip()}.json").read()) + assert pr["number"] + if not parse_one_pull_request(pr): + continue + + assert pr["category"] + + # Normalize category name + for c in categories_preferred_order: + if fuzzywuzzy.fuzz.ratio(pr["category"].lower(), c.lower()) >= 90: + pr["category"] = c + break + + category_to_pr[pr["category"]].append(pr) + user_id = pr["user"]["id"] + users[user_id] = json.loads(open(f"user{user_id}.json").read()) + + +def print_category(category): + print(("#### " + category)) + print() + for pr in category_to_pr[category]: + user = users[pr["user"]["id"]] + user_name = user["name"] if user["name"] else user["login"] + + # Substitute issue links. + # 1) issue number w/o markdown link + pr["entry"] = re.sub( + r"([^[])#([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + pr["entry"], + ) + # 2) issue URL w/o markdown link + pr["entry"] = re.sub( + r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + pr["entry"], + ) + + print( + f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).' + ) + + print() + + +# Print categories in preferred order +for category in categories_preferred_order: + if category in category_to_pr: + print_category(category) + category_to_pr.pop(category) + +# Print the rest of the categories +for category in category_to_pr: + print_category(category) diff --git a/utils/changelog/README.md b/utils/changelog/README.md index 8218af83d96..cd8f8da9b61 100644 --- a/utils/changelog/README.md +++ b/utils/changelog/README.md @@ -5,14 +5,17 @@ Generate github token: Dependencies: ``` -sudo apt-get update -sudo apt-get install git python3 python3-fuzzywuzzy python3-github -python3 changelog.py -h +sudo apt-get install git curl jq python3 python3-fuzzywuzzy +``` + +Update information about tags: +``` +git fetch --tags ``` Usage example: ``` -python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$GITHUB_TOKEN" v21.6.2.7-prestable -python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$USER" --gh-password="$PASSWORD" v21.6.2.7-prestable +export GITHUB_USER=... GITHUB_TOKEN=ghp_... +./changelog.sh v21.5.6.6-stable v21.6.2.7-prestable ``` diff --git a/utils/changelog/changelog.sh b/utils/changelog/changelog.sh new file mode 100755 index 00000000000..52817acfae4 --- /dev/null +++ b/utils/changelog/changelog.sh @@ -0,0 +1,96 @@ +#!/bin/bash +set -e + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +from="$1" +to="$2" +log_command=(git log "$from..$to" --first-parent) + +"${log_command[@]}" > "changelog-log.txt" + +# Check for diamond merges. +if "${log_command[@]}" --oneline --grep "Merge branch '" | grep '' +then + # DO NOT ADD automated handling of diamond merges to this script. + # It is an unsustainable way to work with git, and it MUST be visible. + echo Warning: suspected diamond merges above. + echo Some commits will be missed, review these manually. +fi + +# Search for PR numbers in commit messages. First variant is normal merge, and second +# variant is squashed. Next are some backport message variants. +find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*(#\([[:digit:]]\+\))$/\1/p; + s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") + +# awk is to filter out small task numbers from different task tracker, which are +# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. +"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" + +echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." +if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi + +function github_download() +{ + local url=${1} + local file=${2} + if ! [ -f "$file" ] + then + echo "curl -u \"$GITHUB_USER:***\" -sSf \"$url\" > \"$file\"" + + if ! curl -u "$GITHUB_USER:$GITHUB_TOKEN" \ + -sSf "$url" \ + > "$file" + then + >&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'." + rm "$file" + return 1 + fi + sleep 0.1 + fi +} + +rm changelog-prs-filtered.txt &> /dev/null ||: +for pr in $(cat "changelog-prs.txt") +do + # Download PR info from github. + file="pr$pr.json" + github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue + + if ! [ "$pr" == "$(jq -r .number "$file")" ] + then + >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." + continue + fi + + # Filter out PRs by bots. + user_login=$(jq -r .user.login "$file") + + filter_bot=$(echo "$user_login" | grep -q "\[bot\]$" && echo "Skip." || echo "Ok." ||:) + filter_robot=$(echo "$user_login" | grep -q "robot-clickhouse" && echo "Skip." || echo "Ok." ||:) + + if [ "Skip." == "$filter_robot" ] || [ "Skip." == "$filter_bot" ] + then + continue + fi + + # Download author info from github. + user_id=$(jq -r .user.id "$file") + user_file="user$user_id.json" + github_download "$(jq -r .user.url "$file")" "$user_file" || continue + + if ! [ "$user_id" == "$(jq -r .id "$user_file")" ] + then + >&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')." + continue + fi + + echo "$pr" >> changelog-prs-filtered.txt +done + +echo "### ClickHouse release $to FIXME as compared to $from +" > changelog.md +"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md +cat changelog.md diff --git a/utils/changelog/format-changelog.py b/utils/changelog/format-changelog.py new file mode 100755 index 00000000000..ef1340d48dd --- /dev/null +++ b/utils/changelog/format-changelog.py @@ -0,0 +1,165 @@ +#!/usr/bin/python3 + +import argparse +import collections +import fuzzywuzzy.fuzz +import itertools +import json +import os +import re +import sys + +parser = argparse.ArgumentParser(description="Format changelog for given PRs.") +parser.add_argument( + "file", + metavar="FILE", + type=argparse.FileType("r", encoding="utf-8"), + nargs="?", + default=sys.stdin, + help="File with PR numbers, one per line.", +) +args = parser.parse_args() + +# This function mirrors the PR description checks in ClickhousePullRequestTrigger. +# Returns False if the PR should not be mentioned changelog. +def parse_one_pull_request(item): + description = item["body"] + # Don't skip empty lines because they delimit parts of description + lines = [ + line + for line in [ + x.strip() for x in (description.split("\n") if description else []) + ] + ] + lines = [re.sub(r"\s+", " ", l) for l in lines] + + category = "" + entry = "" + + if lines: + i = 0 + while i < len(lines): + if re.match(r"(?i)^[>*_ ]*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category itself. Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + elif re.match( + r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + else: + i += 1 + + if not category: + # Shouldn't happen, because description check in CI should catch such PRs. + # Fall through, so that it shows up in output and the user can fix it. + category = "NO CL CATEGORY" + + # Filter out the PR categories that are not for changelog. + if re.match( + r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", + category, + ): + return False + + if not entry: + # Shouldn't happen, because description check in CI should catch such PRs. + category = "NO CL ENTRY" + entry = "NO CL ENTRY: '" + item["title"] + "'" + + entry = entry.strip() + if entry[-1] != ".": + entry += "." + + item["entry"] = entry + item["category"] = category + + return True + + +# This array gives the preferred category order, and is also used to +# normalize category names. +categories_preferred_order = [ + "Backward Incompatible Change", + "New Feature", + "Performance Improvement", + "Improvement", + "Bug Fix", + "Build/Testing/Packaging Improvement", + "Other", +] + +category_to_pr = collections.defaultdict(lambda: []) +users = {} +for line in args.file: + pr = json.loads(open(f"pr{line.strip()}.json").read()) + assert pr["number"] + if not parse_one_pull_request(pr): + continue + + assert pr["category"] + + # Normalize category name + for c in categories_preferred_order: + if fuzzywuzzy.fuzz.ratio(pr["category"].lower(), c.lower()) >= 90: + pr["category"] = c + break + + category_to_pr[pr["category"]].append(pr) + user_id = pr["user"]["id"] + users[user_id] = json.loads(open(f"user{user_id}.json").read()) + + +def print_category(category): + print(("#### " + category)) + print() + for pr in category_to_pr[category]: + user = users[pr["user"]["id"]] + user_name = user["name"] if user["name"] else user["login"] + + # Substitute issue links. + # 1) issue number w/o markdown link + pr["entry"] = re.sub( + r"([^[])#([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + pr["entry"], + ) + # 2) issue URL w/o markdown link + pr["entry"] = re.sub( + r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + pr["entry"], + ) + + print( + f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).' + ) + + print() + + +# Print categories in preferred order +for category in categories_preferred_order: + if category in category_to_pr: + print_category(category) + category_to_pr.pop(category) + +# Print the rest of the categories +for category in category_to_pr: + print_category(category) From bb431e0a981f782e5563acb24ed1cda5d19405df Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 May 2022 05:21:51 +0200 Subject: [PATCH 251/615] Revert README.md --- utils/changelog/README.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/utils/changelog/README.md b/utils/changelog/README.md index cd8f8da9b61..8218af83d96 100644 --- a/utils/changelog/README.md +++ b/utils/changelog/README.md @@ -5,17 +5,14 @@ Generate github token: Dependencies: ``` -sudo apt-get install git curl jq python3 python3-fuzzywuzzy -``` - -Update information about tags: -``` -git fetch --tags +sudo apt-get update +sudo apt-get install git python3 python3-fuzzywuzzy python3-github +python3 changelog.py -h ``` Usage example: ``` -export GITHUB_USER=... GITHUB_TOKEN=ghp_... -./changelog.sh v21.5.6.6-stable v21.6.2.7-prestable +python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$GITHUB_TOKEN" v21.6.2.7-prestable +python3 changelog.py --output=changelog-v22.4.1.2305-prestable.md --gh-user-or-token="$USER" --gh-password="$PASSWORD" v21.6.2.7-prestable ``` From 08d01e0b41ba52e357140cd59612e41f8ea0cebb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 May 2022 05:22:28 +0200 Subject: [PATCH 252/615] Revert unrelated changes --- utils/changelog/changelog.sh | 96 ---------------- utils/changelog/format-changelog.py | 165 ---------------------------- 2 files changed, 261 deletions(-) delete mode 100755 utils/changelog/changelog.sh delete mode 100755 utils/changelog/format-changelog.py diff --git a/utils/changelog/changelog.sh b/utils/changelog/changelog.sh deleted file mode 100755 index 52817acfae4..00000000000 --- a/utils/changelog/changelog.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -set -e - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -from="$1" -to="$2" -log_command=(git log "$from..$to" --first-parent) - -"${log_command[@]}" > "changelog-log.txt" - -# Check for diamond merges. -if "${log_command[@]}" --oneline --grep "Merge branch '" | grep '' -then - # DO NOT ADD automated handling of diamond merges to this script. - # It is an unsustainable way to work with git, and it MUST be visible. - echo Warning: suspected diamond merges above. - echo Some commits will be missed, review these manually. -fi - -# Search for PR numbers in commit messages. First variant is normal merge, and second -# variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") - -# awk is to filter out small task numbers from different task tracker, which are -# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. -"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" - -echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." -if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi - -function github_download() -{ - local url=${1} - local file=${2} - if ! [ -f "$file" ] - then - echo "curl -u \"$GITHUB_USER:***\" -sSf \"$url\" > \"$file\"" - - if ! curl -u "$GITHUB_USER:$GITHUB_TOKEN" \ - -sSf "$url" \ - > "$file" - then - >&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'." - rm "$file" - return 1 - fi - sleep 0.1 - fi -} - -rm changelog-prs-filtered.txt &> /dev/null ||: -for pr in $(cat "changelog-prs.txt") -do - # Download PR info from github. - file="pr$pr.json" - github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue - - if ! [ "$pr" == "$(jq -r .number "$file")" ] - then - >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." - continue - fi - - # Filter out PRs by bots. - user_login=$(jq -r .user.login "$file") - - filter_bot=$(echo "$user_login" | grep -q "\[bot\]$" && echo "Skip." || echo "Ok." ||:) - filter_robot=$(echo "$user_login" | grep -q "robot-clickhouse" && echo "Skip." || echo "Ok." ||:) - - if [ "Skip." == "$filter_robot" ] || [ "Skip." == "$filter_bot" ] - then - continue - fi - - # Download author info from github. - user_id=$(jq -r .user.id "$file") - user_file="user$user_id.json" - github_download "$(jq -r .user.url "$file")" "$user_file" || continue - - if ! [ "$user_id" == "$(jq -r .id "$user_file")" ] - then - >&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')." - continue - fi - - echo "$pr" >> changelog-prs-filtered.txt -done - -echo "### ClickHouse release $to FIXME as compared to $from -" > changelog.md -"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md -cat changelog.md diff --git a/utils/changelog/format-changelog.py b/utils/changelog/format-changelog.py deleted file mode 100755 index ef1340d48dd..00000000000 --- a/utils/changelog/format-changelog.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/python3 - -import argparse -import collections -import fuzzywuzzy.fuzz -import itertools -import json -import os -import re -import sys - -parser = argparse.ArgumentParser(description="Format changelog for given PRs.") -parser.add_argument( - "file", - metavar="FILE", - type=argparse.FileType("r", encoding="utf-8"), - nargs="?", - default=sys.stdin, - help="File with PR numbers, one per line.", -) -args = parser.parse_args() - -# This function mirrors the PR description checks in ClickhousePullRequestTrigger. -# Returns False if the PR should not be mentioned changelog. -def parse_one_pull_request(item): - description = item["body"] - # Don't skip empty lines because they delimit parts of description - lines = [ - line - for line in [ - x.strip() for x in (description.split("\n") if description else []) - ] - ] - lines = [re.sub(r"\s+", " ", l) for l in lines] - - category = "" - entry = "" - - if lines: - i = 0 - while i < len(lines): - if re.match(r"(?i)^[>*_ ]*change\s*log\s*category", lines[i]): - i += 1 - if i >= len(lines): - break - # Can have one empty line between header and the category itself. Filter it out. - if not lines[i]: - i += 1 - if i >= len(lines): - break - category = re.sub(r"^[-*\s]*", "", lines[i]) - i += 1 - elif re.match( - r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] - ): - i += 1 - # Can have one empty line between header and the entry itself. Filter it out. - if i < len(lines) and not lines[i]: - i += 1 - # All following lines until empty one are the changelog entry. - entry_lines = [] - while i < len(lines) and lines[i]: - entry_lines.append(lines[i]) - i += 1 - entry = " ".join(entry_lines) - else: - i += 1 - - if not category: - # Shouldn't happen, because description check in CI should catch such PRs. - # Fall through, so that it shows up in output and the user can fix it. - category = "NO CL CATEGORY" - - # Filter out the PR categories that are not for changelog. - if re.match( - r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", - category, - ): - return False - - if not entry: - # Shouldn't happen, because description check in CI should catch such PRs. - category = "NO CL ENTRY" - entry = "NO CL ENTRY: '" + item["title"] + "'" - - entry = entry.strip() - if entry[-1] != ".": - entry += "." - - item["entry"] = entry - item["category"] = category - - return True - - -# This array gives the preferred category order, and is also used to -# normalize category names. -categories_preferred_order = [ - "Backward Incompatible Change", - "New Feature", - "Performance Improvement", - "Improvement", - "Bug Fix", - "Build/Testing/Packaging Improvement", - "Other", -] - -category_to_pr = collections.defaultdict(lambda: []) -users = {} -for line in args.file: - pr = json.loads(open(f"pr{line.strip()}.json").read()) - assert pr["number"] - if not parse_one_pull_request(pr): - continue - - assert pr["category"] - - # Normalize category name - for c in categories_preferred_order: - if fuzzywuzzy.fuzz.ratio(pr["category"].lower(), c.lower()) >= 90: - pr["category"] = c - break - - category_to_pr[pr["category"]].append(pr) - user_id = pr["user"]["id"] - users[user_id] = json.loads(open(f"user{user_id}.json").read()) - - -def print_category(category): - print(("#### " + category)) - print() - for pr in category_to_pr[category]: - user = users[pr["user"]["id"]] - user_name = user["name"] if user["name"] else user["login"] - - # Substitute issue links. - # 1) issue number w/o markdown link - pr["entry"] = re.sub( - r"([^[])#([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - pr["entry"], - ) - # 2) issue URL w/o markdown link - pr["entry"] = re.sub( - r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - pr["entry"], - ) - - print( - f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).' - ) - - print() - - -# Print categories in preferred order -for category in categories_preferred_order: - if category in category_to_pr: - print_category(category) - category_to_pr.pop(category) - -# Print the rest of the categories -for category in category_to_pr: - print_category(category) From 54a6aaef621b88141e463dfcccae420945c4cec2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 17 May 2022 14:32:32 +0300 Subject: [PATCH 253/615] Log query processing stage in executeQuery() Signed-off-by: Azat Khuzhin --- src/Interpreters/executeQuery.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index d1596c08318..3c03bea3dd1 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -158,11 +158,11 @@ static String prepareQueryForLogging(const String & query, ContextPtr context) /// Log query into text log (not into system table). -static void logQuery(const String & query, ContextPtr context, bool internal) +static void logQuery(const String & query, ContextPtr context, bool internal, QueryProcessingStage::Enum stage) { if (internal) { - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(internal) {}", joinLines(query)); + LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(internal) {} (stage: {})", joinLines(query), QueryProcessingStage::toString(stage)); } else { @@ -185,13 +185,14 @@ static void logQuery(const String & query, ContextPtr context, bool internal) if (auto txn = context->getCurrentTransaction()) transaction_info = fmt::format(" (TID: {}, TIDH: {})", txn->tid, txn->tid.getHash()); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}){}{} {}", + LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}){}{} {} (stage: {})", client_info.current_address.toString(), (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), transaction_info, comment, - joinLines(query)); + joinLines(query), + QueryProcessingStage::toString(stage)); if (client_info.client_trace_context.trace_id != UUID()) { @@ -498,7 +499,7 @@ static std::tuple executeQueryImpl( String query = String(begin, begin + std::min(end - begin, static_cast(max_query_size))); auto query_for_logging = prepareQueryForLogging(query, context); - logQuery(query_for_logging, context, internal); + logQuery(query_for_logging, context, internal, stage); if (!internal) { @@ -548,7 +549,7 @@ static std::tuple executeQueryImpl( /// since it substitute parameters and without them query does not contain /// parameters), to keep query as-is in query_log and server log. query_for_logging = prepareQueryForLogging(query, context); - logQuery(query_for_logging, context, internal); + logQuery(query_for_logging, context, internal, stage); /// Propagate WITH statement to children ASTSelect. if (settings.enable_global_with_statement) From 29a8a00656b63a1b5079973592638b6e519cbe8b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 17 May 2022 14:48:06 +0300 Subject: [PATCH 254/615] Add ability to pass QueryKind via clickhouse-client/local (useful for debugging) v2: fix LocalConnection::sendQuery() for Suggest (comes w/o client_info) [1] [1]: https://s3.amazonaws.com/clickhouse-test-reports/37290/7c85175963226ff78eec542efafcff4e650aa0f0/stateless_tests__ubsan__actions_.html Signed-off-by: Azat Khuzhin --- .../completions/clickhouse-bootstrap | 10 +++++ programs/client/Client.cpp | 1 + programs/local/LocalServer.cpp | 1 + src/Client/ClientBase.cpp | 13 ++++++ src/Client/ClientBase.h | 1 + src/Client/LocalConnection.cpp | 8 +++- .../0_stateless/02303_query_kind.reference | 44 +++++++++++++++++++ tests/queries/0_stateless/02303_query_kind.sh | 16 +++++++ 8 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02303_query_kind.reference create mode 100755 tests/queries/0_stateless/02303_query_kind.sh diff --git a/programs/bash-completion/completions/clickhouse-bootstrap b/programs/bash-completion/completions/clickhouse-bootstrap index 98fcd68db16..8684f122503 100644 --- a/programs/bash-completion/completions/clickhouse-bootstrap +++ b/programs/bash-completion/completions/clickhouse-bootstrap @@ -34,6 +34,12 @@ CLICKHOUSE_QueryProcessingStage=( with_mergeable_state_after_aggregation_and_limit ) +CLICKHOUSE_QueryKind=( + initial_query + secondary_query + no_query +) + CLICKHOUSE_Format=( CapnProto PostgreSQLWire @@ -124,6 +130,10 @@ function _complete_for_clickhouse_generic_bin_impl() COMPREPLY=( $(compgen -W "${CLICKHOUSE_QueryProcessingStage[*]}" -- "$cur") ) return 1 ;; + --query_kind) + COMPREPLY=( $(compgen -W "${CLICKHOUSE_QueryKind[*]}" -- "$cur") ) + return 1 + ;; --send_logs_level) COMPREPLY=( $(compgen -W "${CLICKHOUSE_logs_level[*]}" -- "$cur") ) return 1 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 4e4e0cc07f5..cbbf195a68c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1038,6 +1038,7 @@ void Client::processConfig() ClientInfo & client_info = global_context->getClientInfo(); client_info.setInitialQuery(); client_info.quota_key = config().getString("quota_key", ""); + client_info.query_kind = query_kind; } diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index f3fa7ff2bfa..381f8b23db9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -626,6 +626,7 @@ void LocalServer::processConfig() ClientInfo & client_info = global_context->getClientInfo(); client_info.setInitialQuery(); + client_info.query_kind = query_kind; } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 35ef55a1387..9cc31df0b43 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -119,6 +119,17 @@ namespace ProfileEvents namespace DB { +static ClientInfo::QueryKind parseQueryKind(const String & query_kind) +{ + if (query_kind == "initial_query") + return ClientInfo::QueryKind::INITIAL_QUERY; + if (query_kind == "secondary_query") + return ClientInfo::QueryKind::SECONDARY_QUERY; + if (query_kind == "no_query") + return ClientInfo::QueryKind::NO_QUERY; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown query kind {}", query_kind); +} + static void incrementProfileEventsBlock(Block & dst, const Block & src) { if (!dst) @@ -2125,6 +2136,7 @@ void ClientBase::init(int argc, char ** argv) ("query,q", po::value(), "query") ("stage", po::value()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit") + ("query_kind", po::value()->default_value("initial_query"), "One of initial_query/secondary_query/no_query") ("query_id", po::value(), "query_id") ("progress", "print progress of queries execution") @@ -2255,6 +2267,7 @@ void ClientBase::init(int argc, char ** argv) server_logs_file = options["server_logs_file"].as(); query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); + query_kind = parseQueryKind(options["query_kind"].as()); profile_events.print = options.count("print-profile-events"); profile_events.delay_ms = options["profile-events-delay-ms"].as(); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index d373ce5f60b..d11977e984a 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -256,6 +256,7 @@ protected: } profile_events; QueryProcessingStage::Enum query_processing_stage; + ClientInfo::QueryKind query_kind; bool fake_drop = false; diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 77519423763..0707b0bcdc0 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -73,11 +73,15 @@ void LocalConnection::sendQuery( const String & query_id, UInt64 stage, const Settings *, - const ClientInfo *, + const ClientInfo * client_info, bool, std::function process_progress_callback) { - query_context = session.makeQueryContext(); + /// Suggestion comes without client_info. + if (client_info) + query_context = session.makeQueryContext(*client_info); + else + query_context = session.makeQueryContext(); query_context->setCurrentQueryId(query_id); if (send_progress) { diff --git a/tests/queries/0_stateless/02303_query_kind.reference b/tests/queries/0_stateless/02303_query_kind.reference new file mode 100644 index 00000000000..51addfdb857 --- /dev/null +++ b/tests/queries/0_stateless/02303_query_kind.reference @@ -0,0 +1,44 @@ +clickhouse-client --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Projection + Before ORDER BY)) +Header: dummy String + Aggregating + Header: toString(dummy) String + Expression (Before GROUP BY) + Header: toString(dummy) String + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +clickhouse-local --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Projection + Before ORDER BY)) +Header: dummy String + Aggregating + Header: toString(dummy) String + Expression (Before GROUP BY) + Header: toString(dummy) String + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +clickhouse-client --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Projection + Before ORDER BY)) +Header: dummy String + Aggregating + Header: dummy UInt8 + Expression (Before GROUP BY) + Header: dummy UInt8 + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +clickhouse-local --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Projection + Before ORDER BY)) +Header: dummy String + Aggregating + Header: dummy UInt8 + Expression (Before GROUP BY) + Header: dummy UInt8 + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 diff --git a/tests/queries/0_stateless/02303_query_kind.sh b/tests/queries/0_stateless/02303_query_kind.sh new file mode 100755 index 00000000000..5ad5f9ec6f4 --- /dev/null +++ b/tests/queries/0_stateless/02303_query_kind.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function run_query() +{ + echo "clickhouse-client $*" + $CLICKHOUSE_CLIENT "$@" + + echo "clickhouse-local $*" + $CLICKHOUSE_LOCAL "$@" +} +run_query --query_kind secondary_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" +run_query --query_kind initial_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" From e8dd946f3311c952d087f90a22017553869a704a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Thu, 19 May 2022 01:14:27 -0400 Subject: [PATCH 255/615] Update .gitmodules newline at end of file added --- .gitmodules | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 5988282f616..8b30973951f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -270,4 +270,5 @@ url = https://github.com/eigen-mirror/eigen [submodule "contrib/hashidsxx"] path = contrib/hashidsxx - url = https://github.com/schoentoon/hashidsxx.git \ No newline at end of file + url = https://github.com/schoentoon/hashidsxx.git + From 9411406a9ce0023a194364ba2880065896a704d6 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 19 May 2022 09:25:59 +0200 Subject: [PATCH 256/615] Update test --- tests/queries/0_stateless/02305_schema_inference_with_globs.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02305_schema_inference_with_globs.sh b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh index 346931e7204..19506c84645 100755 --- a/tests/queries/0_stateless/02305_schema_inference_with_globs.sh +++ b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 1ad19051e55fe328b833858b32e30470ccb325f4 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 19 May 2022 11:01:05 +0300 Subject: [PATCH 257/615] Fixed error with symbols in key name in S3. --- src/IO/S3/PocoHTTPClient.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 25b03d66097..b9b20106465 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -215,8 +215,16 @@ void PocoHTTPClient::makeRequestInternal( * To overcome this limitation, we encode URL with "Aws::Http::URI" and then pass already prepared URL to Poco. */ - Aws::Http::URI aws_target_uri(uri); - poco_request.setURI(aws_target_uri.GetPath() + aws_target_uri.GetQueryString()); + std::string path_and_query; + const std::string & query = target_uri.getRawQuery(); + const std::string reserved = "?#:;+@&="; /// Poco::URI::RESERVED_QUERY_PARAM without '/'. + Poco::URI::encode(target_uri.getPath(), reserved, path_and_query); + if (!query.empty()) + { + path_and_query += '?'; + path_and_query += query; + } + poco_request.setURI(path_and_query); switch (request.GetMethod()) { From 3d0e4c56e25df15732bef16c7b49fc168a159700 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 10:05:34 +0200 Subject: [PATCH 258/615] Fix prefetch release branch --- tests/ci/release.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ci/release.py b/tests/ci/release.py index 6f3c58d2bd5..b07deffa1fb 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -165,7 +165,10 @@ class Release: ) # Prefetch the branch to have it updated - self.run(f"git fetch {self.repo.url} {branch}:{branch}") + if self._git.branch == branch: + self.run("git pull") + else: + self.run(f"git fetch {self.repo.url} {branch}:{branch}") output = self.run(f"git branch --contains={self.release_commit} {branch}") if branch not in output: raise Exception( From d0fe794fe5793061bc06a570edba784b7707b09b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 10:06:09 +0200 Subject: [PATCH 259/615] Update version to 22.6.1.1 --- cmake/autogenerated_versions.txt | 10 +++--- .../StorageSystemContributors.generated.cpp | 34 +++++++++++++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index caf6f217f6a..210c927b2fd 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54462) +SET(VERSION_REVISION 54463) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 5) +SET(VERSION_MINOR 6) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 77a82cc090dd5dba2d995946e82a12a2cadaaff3) -SET(VERSION_DESCRIBE v22.5.1.1-testing) -SET(VERSION_STRING 22.5.1.1) +SET(VERSION_GITHASH df0cb0620985eb5ec59760cc76f7736e5b6209bb) +SET(VERSION_DESCRIBE v22.6.1.1-testing) +SET(VERSION_STRING 22.6.1.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 51923397ede..42a0f24cc65 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -19,6 +19,7 @@ const char * auto_contributors[] { "Albert Kidrachev", "Alberto", "Aleksandr Karo", + "Aleksandr Razumov", "Aleksandr Shalimov", "Aleksandra (Ася)", "Aleksandrov Vladimir", @@ -179,6 +180,7 @@ const char * auto_contributors[] { "Boris Granveaud", "Boris Kuschel", "Bowen Masco", + "Brandon", "Braulio Valdivielso", "Brendan Cox", "Brett Hoerner", @@ -205,6 +207,7 @@ const char * auto_contributors[] { "CurtizJ", "DF5HSE", "DIAOZHAFENG", + "Dale McDiarmid", "Dan Roscigno", "Daniel Bershatsky", "Daniel Dao", @@ -392,6 +395,7 @@ const char * auto_contributors[] { "João Figueiredo", "Julian Gilyadov", "Julian Zhou", + "Julio Jimenez", "Justin Hilliard", "Kang Liu", "Karl Pietrzak", @@ -425,6 +429,7 @@ const char * auto_contributors[] { "LAL2211", "LB", "LIJINGBO", + "Ladislav Snizek", "Larry Luo", "Lars Eidnes", "Latysheva Alexandra", @@ -451,6 +456,7 @@ const char * auto_contributors[] { "Maksim Kita", "Malte", "Marat IDRISOV", + "Marcelo Rodriguez", "Marek Vavrusa", "Marek Vavruša", "Marek Vavruša", @@ -510,6 +516,7 @@ const char * auto_contributors[] { "Mike Kot", "Mikhail", "Mikhail Andreev", + "Mikhail Artemenko", "Mikhail Cheshkov", "Mikhail Fandyushin", "Mikhail Filimonov", @@ -615,6 +622,7 @@ const char * auto_contributors[] { "Philippe Ombredanne", "Potya", "Pradeep Chhetri", + "Prashant Shahi", "Pxl", "Pysaoke", "Quid37", @@ -652,6 +660,7 @@ const char * auto_contributors[] { "Russ Frank", "Ruzal Ibragimov", "Ryad ZENINE", + "Ryadh DAHIMENE", "S.M.A. Djawadi", "Saad Ur Rahman", "Sabyanin Maxim", @@ -661,6 +670,7 @@ const char * auto_contributors[] { "Samuel Chou", "Saulius Valatka", "Sean Haynes", + "Sean Lafferty", "Serg Kulakov", "Serge Rider", "Sergei Bocharov", @@ -677,6 +687,7 @@ const char * auto_contributors[] { "Sergey Mirvoda", "Sergey Ryzhkov", "Sergey Shtykov", + "Sergey Tulentsev", "Sergey V. Galtsev", "Sergey Zaikin", "Sergi Almacellas Abellana", @@ -727,6 +738,7 @@ const char * auto_contributors[] { "The-Alchemist", "Thom O'Connor", "Thomas Berdy", + "Tian Xinhui", "Tiaonmmn", "Tigran Khudaverdyan", "Timur Magomedov", @@ -804,11 +816,13 @@ const char * auto_contributors[] { "Weiqing Xu", "William Shallum", "Winter Zhang", + "XenoAmess", "Xianda Ke", "Xiang Zhou", "Xin Wang", "Xudong Zhang", "Y Lu", + "Yakko Majuri", "Yakov Olkhovskiy", "Yangkuan Liu", "Yatian Xu", @@ -821,6 +835,7 @@ const char * auto_contributors[] { "Yiğit Konur", "Yohann Jardin", "Yong Wang", + "Yong-Hao Zou", "Youenn Lebras", "Yuntao Wu", "Yuri Dyachenko", @@ -884,6 +899,7 @@ const char * auto_contributors[] { "benbiti", "bgranvea", "bharatnc", + "bkuschel", "blazerer", "bluebirddm", "bo zeng", @@ -936,6 +952,7 @@ const char * auto_contributors[] { "dmi-feo", "dmitrii", "dmitriiut", + "dmitriy", "dmitry kuzmin", "dongyifeng", "eaxdev", @@ -986,9 +1003,13 @@ const char * auto_contributors[] { "grantovsky", "gulige", "guoleiyi", + "guomaolin", + "guov100", + "guykohen", "gyuton", "hanqf-git", "hao.he", + "hardstep33", "hchen9", "hcz", "heleihelei", @@ -997,6 +1018,7 @@ const char * auto_contributors[] { "hermano", "hexiaoting", "hhell", + "homeward", "hotid", "huangzhaowei", "hustnn", @@ -1025,6 +1047,7 @@ const char * auto_contributors[] { "jennyma", "jetgm", "jewisliu", + "jiahui-97", "jianmei zhang", "jkuklis", "jus1096", @@ -1045,6 +1068,7 @@ const char * auto_contributors[] { "l", "l1tsolaiki", "lalex", + "lanfz", "larryluogit", "laurieliyang", "lehasm", @@ -1054,6 +1078,7 @@ const char * auto_contributors[] { "levushkin aleksej", "levysh", "lgbo", + "lgbo-usstc", "lgbo-ustc", "lhuang0928", "lhuang09287750", @@ -1066,6 +1091,7 @@ const char * auto_contributors[] { "listar", "litao91", "liu-bov", + "liumaojing", "liuneng1994", "liuyangkuan", "liuyimin", @@ -1120,8 +1146,10 @@ const char * auto_contributors[] { "nagorny", "nauta", "nautaa", + "ndchikin", "neng.liu", "never lee", + "ni1l", "nicelulu", "nickzhwang", "nikitamikhaylov", @@ -1134,6 +1162,7 @@ const char * auto_contributors[] { "ogorbacheva", "olegkv", "olevino", + "olevino999", "olgarev", "orantius", "p0ny", @@ -1205,6 +1234,7 @@ const char * auto_contributors[] { "tangjiangling", "tao jiang", "tavplubix", + "tchepavel", "tcoyvwac", "tekeri", "templarzq", @@ -1237,10 +1267,12 @@ const char * auto_contributors[] { "vzakaznikov", "wangchao", "weeds085490", + "wuxiaobai24", "wzl", "xPoSx", "xiedeyantu", "xinhuitian", + "yakkomajuri", "yakov-olkhovskiy", "yandd", "yang", @@ -1276,6 +1308,7 @@ const char * auto_contributors[] { "zhukai", "zkun", "zlx19950903", + "zombee0", "zvonand", "zvrr", "zvvr", @@ -1296,6 +1329,7 @@ const char * auto_contributors[] { "何李夫", "凌涛", "吴健", + "小蝌蚪", "小路", "张中南", "张健", From 17afe42ad32d33b92e9339ee20617688a656722c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 19 May 2022 10:39:15 +0200 Subject: [PATCH 260/615] Disable clang-tidy readability-identifier-length readability-identifier-length was added with Clang 14 which does not yet run in the central builds yet but (at least on my system) locally. Disabling the check because it is too noisy. Older clang-tidy versions will just ignore the setting. --- .clang-tidy | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-tidy b/.clang-tidy index 706730c464d..70ce24aa731 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -95,6 +95,7 @@ Checks: '*, -readability-else-after-return, -readability-function-cognitive-complexity, -readability-function-size, + -readability-identifier-length, -readability-implicit-bool-conversion, -readability-isolate-declaration, -readability-magic-numbers, From 3a73ef6cd8ff19e97955a8f9f926110772048e73 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 10:43:52 +0200 Subject: [PATCH 261/615] Fix wrong `id` argument to get GID --- .github/workflows/tags_stable.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index 9c55c619039..0e0eefb4a35 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -32,7 +32,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv - GID=$(id -d "${UID}") + GID=$(id -g "${UID}") docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \ --volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \ /ClickHouse/utils/changelog/changelog.py -vv --gh-user-or-token="$GITHUB_TOKEN" \ From c3fd892e260a8d93fc817d16a10393628529a119 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 10:47:07 +0200 Subject: [PATCH 262/615] Update version_date.tsv and changelogs after v22.5.1.2079-stable --- docs/changelogs/v22.5.1.2079-stable.md | 182 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 183 insertions(+) create mode 100644 docs/changelogs/v22.5.1.2079-stable.md diff --git a/docs/changelogs/v22.5.1.2079-stable.md b/docs/changelogs/v22.5.1.2079-stable.md new file mode 100644 index 00000000000..aab8266c115 --- /dev/null +++ b/docs/changelogs/v22.5.1.2079-stable.md @@ -0,0 +1,182 @@ +### ClickHouse release v22.5.1.2079-stable FIXME as compared to v22.4.1.2305-prestable + +#### Backward Incompatible Change +* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). +* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### New Feature +* add implementation of MeiliSearch storage and table function. [#33332](https://github.com/ClickHouse/ClickHouse/pull/33332) ([Mikhail Artemenko](https://github.com/Michicosun)). +* Add support of GROUPING SETS in GROUP BY clause. Follow up after [#33186](https://github.com/ClickHouse/ClickHouse/issues/33186). This implementation supports a parallel processing of grouping sets. [#33631](https://github.com/ClickHouse/ClickHouse/pull/33631) ([Dmitry Novik](https://github.com/novikd)). +* According to the design mentioned at :[#19627](https://github.com/ClickHouse/ClickHouse/issues/19627)#issuecomment-1068772646. [#35318](https://github.com/ClickHouse/ClickHouse/pull/35318) ([徐炘](https://github.com/weeds085490)). +* Added `SYSTEM SYNC DATABASE REPLICA` query which allows to sync tables metadata inside Replicated database, because currently synchronisation is asynchronous. [#35944](https://github.com/ClickHouse/ClickHouse/pull/35944) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* - Add output format Prometheus, [#36051](https://github.com/ClickHouse/ClickHouse/issues/36051). [#36206](https://github.com/ClickHouse/ClickHouse/pull/36206) ([Vladimir C](https://github.com/vdimir)). +* Parse collations in CREATE TABLE, throw exception or ignore. closes [#35892](https://github.com/ClickHouse/ClickHouse/issues/35892). [#36271](https://github.com/ClickHouse/ClickHouse/pull/36271) ([yuuch](https://github.com/yuuch)). +* Add aliases JSONLines and NDJSON for JSONEachRow. Closes [#36303](https://github.com/ClickHouse/ClickHouse/issues/36303). [#36327](https://github.com/ClickHouse/ClickHouse/pull/36327) ([flynn](https://github.com/ucasfl)). +* Set parts_to_delay_insert and parts_to_throw_insert as query-level settings. If they are defined, they can override table-level settings. [#36371](https://github.com/ClickHouse/ClickHouse/pull/36371) ([Memo](https://github.com/Joeywzr)). +* temporary table can show total rows and total bytes. [#36401](https://github.com/ClickHouse/ClickHouse/issues/36401). [#36439](https://github.com/ClickHouse/ClickHouse/pull/36439) ([xiedeyantu](https://github.com/xiedeyantu)). +* Added new hash function - wyHash64. [#36467](https://github.com/ClickHouse/ClickHouse/pull/36467) ([olevino](https://github.com/olevino)). +* Window function nth_value was added. [#36601](https://github.com/ClickHouse/ClickHouse/pull/36601) ([Nikolay](https://github.com/ndchikin)). +* Add MySQLDump input format. It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. [#36667](https://github.com/ClickHouse/ClickHouse/pull/36667) ([Kruglov Pavel](https://github.com/Avogar)). +* New single binary based diagnostics tool. [#36705](https://github.com/ClickHouse/ClickHouse/pull/36705) ([Dale McDiarmid](https://github.com/gingerwizard)). +* **Description:** It is used to count the system table of a request for remote file access, which can help users analyze the causes of performance fluctuations in the scenario of separation of storage and computer. The current system table structure is as follows. When a query reads a segment of a remote file, a record is generated. Read type include **READ_FROM_FS_AND_DOWNLOADED_TO_CACHE、READ_FROM_CACHE、READ_FROM_FS_BYPASSING_CACHE**, which used to indicate whether the query accesses the segment from the cache or from a remote file. [#36802](https://github.com/ClickHouse/ClickHouse/pull/36802) ([Han Shukai](https://github.com/KinderRiven)). +* Adds `h3Line`, `h3Distance` and `h3HexRing` functions. [#37030](https://github.com/ClickHouse/ClickHouse/pull/37030) ([Bharat Nallan](https://github.com/bharatnc)). +* Related issue - [#35101](https://github.com/ClickHouse/ClickHouse/issues/35101). [#37033](https://github.com/ClickHouse/ClickHouse/pull/37033) ([qieqieplus](https://github.com/qieqieplus)). +* Added system.certificates table. [#37142](https://github.com/ClickHouse/ClickHouse/pull/37142) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### Performance Improvement +* Improve performance or ORDER BY, MergeJoin, insertion into MergeTree using JIT compilation of sort columns comparator. [#34469](https://github.com/ClickHouse/ClickHouse/pull/34469) ([Maksim Kita](https://github.com/kitaisreal)). +* First commit is to increase the inline threshold. Next commits will improve queries by inlining for those who have shown better performance. This way we will not increase the compile time and binary size and optimize the program. [#34544](https://github.com/ClickHouse/ClickHouse/pull/34544) ([Daniel Kutenin](https://github.com/danlark1)). +* Transform OR LIKE chain to multiMatchAny. Will enable once we have more confidence it works. [#34932](https://github.com/ClickHouse/ClickHouse/pull/34932) ([Daniel Kutenin](https://github.com/danlark1)). +* Rewrite 'select countDistinct(a) from t' to 'select count(1) from (select a from t groupBy a)'. [#35993](https://github.com/ClickHouse/ClickHouse/pull/35993) ([zhanglistar](https://github.com/zhanglistar)). +* Change structure of `system.asynchronous_metric_log`. It will take about 10 times less space. This closes [#36357](https://github.com/ClickHouse/ClickHouse/issues/36357). The field `event_time_microseconds` was removed, because it is useless. [#36360](https://github.com/ClickHouse/ClickHouse/pull/36360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The default `HashJoin` is not thread safe for inserting right table's rows and run it in a single thread. When the right table is large, the join process is too slow with low cpu utilization. [#36415](https://github.com/ClickHouse/ClickHouse/pull/36415) ([lgbo](https://github.com/lgbo-ustc)). +* Improve performance of reading from storage `File` and table functions `file` in case when path has globs and matched directory contains large number of files. [#36647](https://github.com/ClickHouse/ClickHouse/pull/36647) ([Anton Popov](https://github.com/CurtizJ)). +* Appy parallel parsing for input format `HiveText`, which can speed up HiveText parsing by 2x when reading local file. [#36650](https://github.com/ClickHouse/ClickHouse/pull/36650) ([李扬](https://github.com/taiyang-li)). +* Improves performance of file descriptor cache by narrowing mutex scopes. [#36682](https://github.com/ClickHouse/ClickHouse/pull/36682) ([Anton Kozlov](https://github.com/tonickkozlov)). +* This PR improves the `WATCH` query in WindowView: 1. Reduce the latency of providing query results by calling the `fire_condition` signal. 2. Makes the cancel query operation(ctrl-c) faster, by checking `isCancelled()` more frequently. [#37226](https://github.com/ClickHouse/ClickHouse/pull/37226) ([vxider](https://github.com/Vxider)). +* Improve performance of `avg`, `sum` aggregate functions if used without GROUP BY expression. [#37257](https://github.com/ClickHouse/ClickHouse/pull/37257) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of unary arithmetic functions (`bitCount`, `bitNot`, `abs`, `intExp2`, `intExp10`, `negate`, `roundAge`, `roundDuration`, `roundToExp2`, `sign`) using dynamic dispatch. [#37289](https://github.com/ClickHouse/ClickHouse/pull/37289) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Improvement +* Remind properly if use clickhouse-client --file without preceeding --external. Close [#34747](https://github.com/ClickHouse/ClickHouse/issues/34747). [#34765](https://github.com/ClickHouse/ClickHouse/pull/34765) ([李扬](https://github.com/taiyang-li)). +* Added support for specifying `content_type` in predefined and static HTTP handler config. [#34916](https://github.com/ClickHouse/ClickHouse/pull/34916) ([Roman Nikonov](https://github.com/nic11)). +* Implement partial GROUP BY key for optimize_aggregation_in_order. [#35111](https://github.com/ClickHouse/ClickHouse/pull/35111) ([Azat Khuzhin](https://github.com/azat)). +* Nullables detection in protobuf using Google wrappers. [#35149](https://github.com/ClickHouse/ClickHouse/pull/35149) ([Jakub Kuklis](https://github.com/jkuklis)). +* If the required amount of memory is available before the selected query stopped, all waiting queries continue execution. Now we don't stop any query if memory is freed before the moment when the selected query knows about the cancellation. [#35637](https://github.com/ClickHouse/ClickHouse/pull/35637) ([Dmitry Novik](https://github.com/novikd)). +* Enable memory overcommit by default. [#35921](https://github.com/ClickHouse/ClickHouse/pull/35921) ([Dmitry Novik](https://github.com/novikd)). +* - Add branch to avoid unnecessary memcpy in readbig. [#36095](https://github.com/ClickHouse/ClickHouse/pull/36095) ([jasperzhu](https://github.com/jinjunzh)). +* Refactor code around schema inference with globs. Try next file from glob only if it makes sense (previously we tried next file in case of any error). Also it fixes [#36317](https://github.com/ClickHouse/ClickHouse/issues/36317). [#36205](https://github.com/ClickHouse/ClickHouse/pull/36205) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve schema inference for json objects. [#36207](https://github.com/ClickHouse/ClickHouse/pull/36207) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for force recovery which allows you to reconfigure cluster without quorum. [#36258](https://github.com/ClickHouse/ClickHouse/pull/36258) ([Antonio Andelic](https://github.com/antonio2368)). +* We create a local interpreter if we want to execute query on localhost replica. But for when executing query on multiple replicas we rely on the fact that a connection exists so replicas can talk to coordinator. It is now improved and localhost replica can talk to coordinator directly in the same process. [#36281](https://github.com/ClickHouse/ClickHouse/pull/36281) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Show names of erroneous files in case of parsing errors while executing table functions `file`, `s3` and `url`. [#36314](https://github.com/ClickHouse/ClickHouse/pull/36314) ([Anton Popov](https://github.com/CurtizJ)). +* Allowed to increase the number of threads for executing background operations (merges, mutations, moves and fetches) at runtime if they are specified at top level config. [#36425](https://github.com/ClickHouse/ClickHouse/pull/36425) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* clickhouse-benchmark can read auth from environment variables. [#36497](https://github.com/ClickHouse/ClickHouse/pull/36497) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Allow names of tuple elements that start from digits. [#36544](https://github.com/ClickHouse/ClickHouse/pull/36544) ([Anton Popov](https://github.com/CurtizJ)). +* Allow file descriptors in table function file if it is run in clickhouse-local. [#36562](https://github.com/ClickHouse/ClickHouse/pull/36562) ([wuxiaobai24](https://github.com/wuxiaobai24)). +* Allow to cast columns of type `Object(...)` to `Object(Nullable(...))`. [#36564](https://github.com/ClickHouse/ClickHouse/pull/36564) ([awakeljw](https://github.com/awakeljw)). +* Cleanup CSS in Play UI. The pixels are more evenly placed. Better usability for long content in table cells. [#36569](https://github.com/ClickHouse/ClickHouse/pull/36569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The metrics about time spent reading from s3 now calculated correctly. Close [#35483](https://github.com/ClickHouse/ClickHouse/issues/35483). [#36572](https://github.com/ClickHouse/ClickHouse/pull/36572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve `SYSTEM DROP FILESYSTEM CACHE` query: `` option and `FORCE` option. [#36639](https://github.com/ClickHouse/ClickHouse/pull/36639) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `is_all_data_sent` column into `system.processes`, and improve internal testing hardening check based on it. [#36649](https://github.com/ClickHouse/ClickHouse/pull/36649) ([Azat Khuzhin](https://github.com/azat)). +* Now date time conversion functions that generates time before 1970-01-01 00:00:00 with partial hours/minutes timezones will be saturated to zero instead of overflow. This is the continuation of https://github.com/ClickHouse/ClickHouse/pull/29953 which addresses https://github.com/ClickHouse/ClickHouse/pull/29953#discussion_r800550280 . Mark as improvement because it's implementation defined behavior (and very rare case) and we are allowed to break it. [#36656](https://github.com/ClickHouse/ClickHouse/pull/36656) ([Amos Bird](https://github.com/amosbird)). +* Allow to cancel query while still keep decent query id in MySQLHandler. [#36699](https://github.com/ClickHouse/ClickHouse/pull/36699) ([Amos Bird](https://github.com/amosbird)). +* Properly cancel INSERT queries in `clickhouse-client`/`clickhouse-local`. [#36710](https://github.com/ClickHouse/ClickHouse/pull/36710) ([Azat Khuzhin](https://github.com/azat)). +* Allow cluster macro in s3Cluster table function. [#36726](https://github.com/ClickHouse/ClickHouse/pull/36726) ([Vadim Volodin](https://github.com/PolyProgrammist)). +* Added `user_defined_path` config setting. [#36753](https://github.com/ClickHouse/ClickHouse/pull/36753) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to execute hash functions with arguments of type `Array(Tuple(..))`. [#36812](https://github.com/ClickHouse/ClickHouse/pull/36812) ([Anton Popov](https://github.com/CurtizJ)). +* Add warning if someone running clickhouse-server with log level "test". The log level "test" was added recently and cannot be used in production due to inevitable, unavoidable, fatal and life-threatening performance degradation. [#36824](https://github.com/ClickHouse/ClickHouse/pull/36824) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Play UI: If there is one row in result and more than a few columns, display the result vertically. Continuation of [#36811](https://github.com/ClickHouse/ClickHouse/issues/36811). [#36842](https://github.com/ClickHouse/ClickHouse/pull/36842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add extra diagnostic info (if applicable) when sending exception to other server. [#36872](https://github.com/ClickHouse/ClickHouse/pull/36872) ([Alexander Tokmakov](https://github.com/tavplubix)). +* After [#36425](https://github.com/ClickHouse/ClickHouse/issues/36425) settings like `background_fetches_pool_size` became obsolete and can appear in top level config, but clickhouse throws and exception like `Error updating configuration from '/etc/clickhouse-server/config.xml' config.: Code: 137. DB::Exception: A setting 'background_fetches_pool_size' appeared at top level in config /etc/clickhouse-server/config.xml.` This is fixed. [#36917](https://github.com/ClickHouse/ClickHouse/pull/36917) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Finalize write buffers in case of exception to avoid doing it in destructors. Hope it fixes: [#36907](https://github.com/ClickHouse/ClickHouse/issues/36907). [#36979](https://github.com/ClickHouse/ClickHouse/pull/36979) ([Kruglov Pavel](https://github.com/Avogar)). +* Play UI: Nullable numbers will be aligned to the right in table cells. This closes [#36982](https://github.com/ClickHouse/ClickHouse/issues/36982). [#36988](https://github.com/ClickHouse/ClickHouse/pull/36988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implemented a new mode of handling row policies which can be enabled in the main configuration which enables users without permissive row policies to read rows. [#36997](https://github.com/ClickHouse/ClickHouse/pull/36997) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug which can lead to forgotten outdated parts in MergeTree table engines family in case of filesystem failures during parts removal. Before fix they will be removed only after first server restart. [#37014](https://github.com/ClickHouse/ClickHouse/pull/37014) ([alesapin](https://github.com/alesapin)). +* Modify query div in play.html to be extendable beyond 200px height. In case of very long queries it is helpful to extend the textarea element, only today, since the div is fixed height, the extended textarea hides the data div underneath. With this fix, extending the textarea element will push the data div down/up such the extended textarea won't hide it. [#37051](https://github.com/ClickHouse/ClickHouse/pull/37051) ([guyco87](https://github.com/guyco87)). +* Better read from cache. [#37054](https://github.com/ClickHouse/ClickHouse/pull/37054) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix progress indication for `INSERT SELECT` in clickhouse-local for any query and for file progress in client, more correct file progress. [#37075](https://github.com/ClickHouse/ClickHouse/pull/37075) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Option `compatibility_ignore_auto_increment_in_create_table` allows ignoring `AUTO_INCREMENT` keyword in a column declaration to simplify migration from MySQL. [#37178](https://github.com/ClickHouse/ClickHouse/pull/37178) ([Igor Nikonov](https://github.com/devcrafter)). +* Added implicit cast for `h3kRing` function second argument to improve usability. Closes [#35432](https://github.com/ClickHouse/ClickHouse/issues/35432). [#37189](https://github.com/ClickHouse/ClickHouse/pull/37189) ([Maksim Kita](https://github.com/kitaisreal)). +* Limit the max partitions could be queried for each hive table. Avoid resource overruns. [#37281](https://github.com/ClickHouse/ClickHouse/pull/37281) ([lgbo](https://github.com/lgbo-ustc)). + +#### Bug Fix +* Extracts Version ID if present from the URI and adds a request to the AWS HTTP URI. Closes [#31221](https://github.com/ClickHouse/ClickHouse/issues/31221). - [x] Extract `Version ID` from URI if present and reassemble without it. - [x] Configure `AWS HTTP URI` object with request. - [x] Unit Tests: [`gtest_s3_uri`](https://github.com/ClickHouse/ClickHouse/blob/2340a6c6849ebc05a8efbf97ba8de3ff9dc0eff4/src/IO/tests/gtest_s3_uri.cpp) - [x] Drop instrumentation commit. [#34571](https://github.com/ClickHouse/ClickHouse/pull/34571) ([Saad Ur Rahman](https://github.com/surahman)). + +#### Build/Testing/Packaging Improvement +* Now `clickhouse-keeper` for the `x86_64` architecture is statically linked with [musl](https://musl.libc.org/) and doesn't depend on any system libraries. [#31833](https://github.com/ClickHouse/ClickHouse/pull/31833) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fail performance comparison on errors in the report. [#34797](https://github.com/ClickHouse/ClickHouse/pull/34797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Check out the most of build jobs with depth=1. [#36091](https://github.com/ClickHouse/ClickHouse/pull/36091) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Bump minizip-ng to a sane version, or else old git won't be able to address dangling remote ref. [#35656](https://github.com/ClickHouse/ClickHouse/issues/35656). [#36295](https://github.com/ClickHouse/ClickHouse/pull/36295) ([Amos Bird](https://github.com/amosbird)). +* Use consistent `force tests` label in CI. [#36496](https://github.com/ClickHouse/ClickHouse/pull/36496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Limit PowerPC code generation to Power8 for better compatibility. This closes [#36025](https://github.com/ClickHouse/ClickHouse/issues/36025). [#36529](https://github.com/ClickHouse/ClickHouse/pull/36529) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - More robust handling of unknown architectures in CMake. [#36614](https://github.com/ClickHouse/ClickHouse/pull/36614) ([Robert Schulze](https://github.com/rschu1ze)). +* Simplify performance test. This will give a chance for us to use it. [#36769](https://github.com/ClickHouse/ClickHouse/pull/36769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix checking for rabbitmq liveness in tests. Fixed incorrect import. [#36938](https://github.com/ClickHouse/ClickHouse/pull/36938) ([tchepavel](https://github.com/tchepavel)). +* ClickHouse builds for `PowerPC64LE` architecture are now available in universal installation script `curl https://clickhouse.com/ | sh` and by direct link `https://builds.clickhouse.com/master/powerpc64le/clickhouse`. [#37095](https://github.com/ClickHouse/ClickHouse/pull/37095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Make cmake build scripts a bit more robust. [#37169](https://github.com/ClickHouse/ClickHouse/pull/37169) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* The ilike() function on FixedString columns could have returned wrong results (i.e. match less than it should). [#37117](https://github.com/ClickHouse/ClickHouse/pull/37117) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix implicit cast for optimize_skip_unused_shards_rewrite_in. [#37153](https://github.com/ClickHouse/ClickHouse/pull/37153) ([Azat Khuzhin](https://github.com/azat)). +* Enable `enable_global_with_statement` for subqueries, close [#37141](https://github.com/ClickHouse/ClickHouse/issues/37141). [#37166](https://github.com/ClickHouse/ClickHouse/pull/37166) ([Vladimir C](https://github.com/vdimir)). +* Now WindowView `WATCH EVENTS` query will not be terminated due to the nonempty Chunk created in `WindowViewSource.h:58`. [#37182](https://github.com/ClickHouse/ClickHouse/pull/37182) ([vxider](https://github.com/Vxider)). +* Fix "Cannot create column of type Set" for distributed queries with LIMIT BY. [#37193](https://github.com/ClickHouse/ClickHouse/pull/37193) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible overflow during `OvercommitRatio` comparison. cc @tavplubix. [#37197](https://github.com/ClickHouse/ClickHouse/pull/37197) ([Dmitry Novik](https://github.com/novikd)). +* Update `max_fired_watermark ` after blocks **actually** fired, in case delete data that hasn't been fired yet. [#37225](https://github.com/ClickHouse/ClickHouse/pull/37225) ([vxider](https://github.com/Vxider)). +* Kafka does not need `group.id` on producer stage. In console log you can find Warning that describe this issue: ``` 2022.05.15 17:59:13.270227 [ 137 ] {} StorageKafka (topic-name): [rdk:CONFWARN] [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance ```. [#37228](https://github.com/ClickHouse/ClickHouse/pull/37228) ([Mark Andreev](https://github.com/mrk-andreev)). +* fix MySQL database engine to compatible with binary(0) dataType. [#37232](https://github.com/ClickHouse/ClickHouse/pull/37232) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix execution of mutations in tables, in which there exist columns of type `Object`. Using subcolumns of type `Object` in `WHERE` expression of `UPDATE` or `DELETE` queries is now allowed yet, as well as manipulating (`DROP`, `MODIFY`) of separate subcolumns. Fixes [#37205](https://github.com/ClickHouse/ClickHouse/issues/37205). [#37266](https://github.com/ClickHouse/ClickHouse/pull/37266) ([Anton Popov](https://github.com/CurtizJ)). +* Fix Nullable(String) to Nullable(Bool/IPv4/IPv6) conversion Closes [#37221](https://github.com/ClickHouse/ClickHouse/issues/37221). [#37270](https://github.com/ClickHouse/ClickHouse/pull/37270) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix system.opentelemetry_span_log attribute.values alias to values instead of keys. [#37275](https://github.com/ClickHouse/ClickHouse/pull/37275) ([Aleksandr Razumov](https://github.com/ernado)). +* Fix possible deadlock in OvercommitTracker during logging. cc @alesapin @tavplubix Fixes [#37272](https://github.com/ClickHouse/ClickHouse/issues/37272). [#37299](https://github.com/ClickHouse/ClickHouse/pull/37299) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* - fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)). +* Accidentally ZSTD support for Arrow was not being built. This fixes [#35283](https://github.com/ClickHouse/ClickHouse/issues/35283). [#35486](https://github.com/ClickHouse/ClickHouse/pull/35486) ([Sean Lafferty](https://github.com/seanlaff)). +* Fix ALTER DROP COLUMN of nested column with compact parts (i.e. `ALTER TABLE x DROP COLUMN n`, when there is column `n.d`). [#35797](https://github.com/ClickHouse/ClickHouse/pull/35797) ([Azat Khuzhin](https://github.com/azat)). +* Fix insertion of complex JSONs with nested arrays to columns of type `Object`. [#36077](https://github.com/ClickHouse/ClickHouse/pull/36077) ([Anton Popov](https://github.com/CurtizJ)). +* Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* - Fix assertion in JOIN, close [#36199](https://github.com/ClickHouse/ClickHouse/issues/36199). [#36201](https://github.com/ClickHouse/ClickHouse/pull/36201) ([Vladimir C](https://github.com/vdimir)). +* Fix dictionary reload for `ClickHouseDictionarySource` if it contains scalar subqueries. [#36390](https://github.com/ClickHouse/ClickHouse/pull/36390) ([lthaooo](https://github.com/lthaooo)). +* Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in s3Cluster schema inference that let to the fact that not all data was read in the select from s3Cluster. The bug appeared in https://github.com/ClickHouse/ClickHouse/pull/35544. [#36434](https://github.com/ClickHouse/ClickHouse/pull/36434) ([Kruglov Pavel](https://github.com/Avogar)). +* Server might fail to start if it cannot resolve hostname of external ClickHouse dictionary. It's fixed. Fixes [#36451](https://github.com/ClickHouse/ClickHouse/issues/36451). [#36463](https://github.com/ClickHouse/ClickHouse/pull/36463) ([Alexander Tokmakov](https://github.com/tavplubix)). +* This code segment can prove bug. ``` int main() { RangeGenerator g{1230, 100}; std::cout << g.totalRanges() << std::endl; int count = 0; while(g.nextRange()) ++count; std::cout << "count:" << count << std::endl; return 0; }. [#36469](https://github.com/ClickHouse/ClickHouse/pull/36469) ([李扬](https://github.com/taiyang-li)). +* Fix clickhouse-benchmark json report results. [#36473](https://github.com/ClickHouse/ClickHouse/pull/36473) ([Tian Xinhui](https://github.com/xinhuitian)). +* Add missing enum values in system.session_log table. Closes [#36474](https://github.com/ClickHouse/ClickHouse/issues/36474). [#36480](https://github.com/ClickHouse/ClickHouse/pull/36480) ([Memo](https://github.com/Joeywzr)). +* Fix possible exception with unknown packet from server in client. [#36481](https://github.com/ClickHouse/ClickHouse/pull/36481) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix usage of executable user defined functions in GROUP BY. Before executable user defined functions cannot be used as expressions in GROUP BY. Closes [#36448](https://github.com/ClickHouse/ClickHouse/issues/36448). [#36486](https://github.com/ClickHouse/ClickHouse/pull/36486) ([Maksim Kita](https://github.com/kitaisreal)). +* close [#33906](https://github.com/ClickHouse/ClickHouse/issues/33906). [#36489](https://github.com/ClickHouse/ClickHouse/pull/36489) ([awakeljw](https://github.com/awakeljw)). +* Fix hostname sanity checks for Keeper cluster configuration. Add `keeper_server.host_checks_enabled` config to enable/disable those checks. [#36492](https://github.com/ClickHouse/ClickHouse/pull/36492) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix offset update ReadBufferFromEncryptedFile, which could cause undefined behaviour. [#36493](https://github.com/ClickHouse/ClickHouse/pull/36493) ([Kseniia Sumarokova](https://github.com/kssenii)). +* - Fix potential error with literals in `WHERE` for join queries. Close [#36279](https://github.com/ClickHouse/ClickHouse/issues/36279). [#36542](https://github.com/ClickHouse/ClickHouse/pull/36542) ([Vladimir C](https://github.com/vdimir)). +* Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix format crash when default expression follow EPHEMERAL not literal. Closes [#36618](https://github.com/ClickHouse/ClickHouse/issues/36618). [#36633](https://github.com/ClickHouse/ClickHouse/pull/36633) ([flynn](https://github.com/ucasfl)). +* Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed parsing of query settings in `CREATE` query when engine is not specified. Fixes https://github.com/ClickHouse/ClickHouse/pull/34187#issuecomment-1103812419. [#36642](https://github.com/ClickHouse/ClickHouse/pull/36642) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible heap-use-after-free in schema inference. Closes [#36661](https://github.com/ClickHouse/ClickHouse/issues/36661). [#36679](https://github.com/ClickHouse/ClickHouse/pull/36679) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix server restart if cache configuration changed. [#36685](https://github.com/ClickHouse/ClickHouse/pull/36685) ([Kseniia Sumarokova](https://github.com/kssenii)). +* In the previous [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that testing **(stateless tests, flaky check (address, actions))** is timeout. Moreover, testing locally can also trigger unstable system deadlocks. This problem still exists when using the latest source code of master. [#36697](https://github.com/ClickHouse/ClickHouse/pull/36697) ([Han Shukai](https://github.com/KinderRiven)). +* Fix server reload on port change (do not wait for current connections from query context). [#36700](https://github.com/ClickHouse/ClickHouse/pull/36700) ([Azat Khuzhin](https://github.com/azat)). +* Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). +* During the [test](https://s3.amazonaws.com/clickhouse-test-reports/36376/1cb1c7275cb53769ab826772db9b71361bb3e413/stress_test__thread__actions_/clickhouse-server.clean.log) in [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that the one cache class was initialized twice, it throws a exception. Although the cause of this problem is not clear, there should be code logic of repeatedly loading disk in ClickHouse, so we need to make special judgment for this situation. [#36737](https://github.com/ClickHouse/ClickHouse/pull/36737) ([Han Shukai](https://github.com/KinderRiven)). +* Fix a bug of `groupBitmapAndState`/`groupBitmapOrState`/`groupBitmapXorState` on distributed table. [#36739](https://github.com/ClickHouse/ClickHouse/pull/36739) ([Zhang Yifan](https://github.com/zhangyifan27)). +* Fix timeouts in Hedged requests. Connection hang right after sending remote query could lead to eternal waiting. [#36749](https://github.com/ClickHouse/ClickHouse/pull/36749) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix insertion to columns of type `Object` from multiple files, e.g. via table function `file` with globs. [#36762](https://github.com/ClickHouse/ClickHouse/pull/36762) ([Anton Popov](https://github.com/CurtizJ)). +* Fix some issues with async reads from remote filesystem which happened when reading low cardinality. [#36763](https://github.com/ClickHouse/ClickHouse/pull/36763) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix creation of tables with `flatten_nested = 0`. Previously unflattened `Nested` columns could be flattened after server restart. [#36803](https://github.com/ClickHouse/ClickHouse/pull/36803) ([Anton Popov](https://github.com/CurtizJ)). +* Fix incorrect cast in cached buffer from remote fs. [#36809](https://github.com/ClickHouse/ClickHouse/pull/36809) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix fire in window view with hop window [#34044](https://github.com/ClickHouse/ClickHouse/issues/34044). [#36861](https://github.com/ClickHouse/ClickHouse/pull/36861) ([vxider](https://github.com/Vxider)). +* Fix `current_size` count in cache. [#36887](https://github.com/ClickHouse/ClickHouse/pull/36887) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix incorrect query result when doing constant aggregation. This fixes [#36728](https://github.com/ClickHouse/ClickHouse/issues/36728) . [#36888](https://github.com/ClickHouse/ClickHouse/pull/36888) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)). +* Fix bugs when using multiple columns in WindowView by adding converting actions to make it possible to call`writeIntoWindowView` with a slightly different schema. [#36928](https://github.com/ClickHouse/ClickHouse/pull/36928) ([vxider](https://github.com/Vxider)). +* Fix issue: [#36671](https://github.com/ClickHouse/ClickHouse/issues/36671). [#36929](https://github.com/ClickHouse/ClickHouse/pull/36929) ([李扬](https://github.com/taiyang-li)). +* Fix stuck when dropping source table in WindowView. Closes [#35678](https://github.com/ClickHouse/ClickHouse/issues/35678). [#36967](https://github.com/ClickHouse/ClickHouse/pull/36967) ([vxider](https://github.com/Vxider)). +* Fixed logical error on `TRUNCATE` query in `Replicated` database. Fixes [#33747](https://github.com/ClickHouse/ClickHouse/issues/33747). [#36976](https://github.com/ClickHouse/ClickHouse/pull/36976) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix sending external tables data in HedgedConnections with max_parallel_replicas != 1. [#36981](https://github.com/ClickHouse/ClickHouse/pull/36981) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed problem with infs in `quantileTDigest`. Fixes [#32107](https://github.com/ClickHouse/ClickHouse/issues/32107). [#37021](https://github.com/ClickHouse/ClickHouse/pull/37021) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix LowCardinality->ArrowDictionary invalid output when type of indexes is not UInt8. Closes [#36832](https://github.com/ClickHouse/ClickHouse/issues/36832). [#37043](https://github.com/ClickHouse/ClickHouse/pull/37043) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix in-order `GROUP BY` (`optimize_aggregation_in_order=1`) with `*Array` (`groupArrayArray`/...) aggregate functions. [#37046](https://github.com/ClickHouse/ClickHouse/pull/37046) ([Azat Khuzhin](https://github.com/azat)). +* Fixed performance degradation of some INSERT SELECT queries with implicit aggregation. Fixes [#36792](https://github.com/ClickHouse/ClickHouse/issues/36792). [#37047](https://github.com/ClickHouse/ClickHouse/pull/37047) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix optimize_aggregation_in_order with prefix GROUP BY and *Array aggregate functions. [#37050](https://github.com/ClickHouse/ClickHouse/pull/37050) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Minor refactor to prefer C++ Standard Algorithms"'. [#36511](https://github.com/ClickHouse/ClickHouse/pull/36511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Strict taskstats parser"'. [#36591](https://github.com/ClickHouse/ClickHouse/pull/36591) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Translate docs/zh/sql-reference/data-types/map.md"'. [#36594](https://github.com/ClickHouse/ClickHouse/pull/36594) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Update setting.md"'. [#36595](https://github.com/ClickHouse/ClickHouse/pull/36595) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Documentation: Add a missing **ESTIMATE** in explain syntax'. [#36717](https://github.com/ClickHouse/ClickHouse/pull/36717) ([小蝌蚪](https://github.com/kayhaw)). +* NO CL ENTRY: '[Snyk] Security upgrade numpy from 1.16.6 to 1.22.2'. [#36729](https://github.com/ClickHouse/ClickHouse/pull/36729) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Translate playground.md to Chinese'. [#36821](https://github.com/ClickHouse/ClickHouse/pull/36821) ([小蝌蚪](https://github.com/kayhaw)). +* NO CL ENTRY: 'Revert "Memory overcommit: continue query execution if memory is available"'. [#36858](https://github.com/ClickHouse/ClickHouse/pull/36858) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Revert "Memory overcommit: continue query execution if memory is available""'. [#36859](https://github.com/ClickHouse/ClickHouse/pull/36859) ([Dmitry Novik](https://github.com/novikd)). +* NO CL ENTRY: 'Revert "BLAKE3 hash function documentation"'. [#37092](https://github.com/ClickHouse/ClickHouse/pull/37092) ([Rich Raposa](https://github.com/rfraposa)). +* NO CL ENTRY: 'Revert "Remove height restrictions from the query div in play web tool."'. [#37261](https://github.com/ClickHouse/ClickHouse/pull/37261) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 0f8d13a7d93..1d8bae44904 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v22.5.1.2079-stable 2022-05-19 v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 From e32b69577526c93e19ef86496943928893389a47 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 19 May 2022 08:00:38 +0300 Subject: [PATCH 263/615] Fix projections with GROUP/ORDER BY in query and optimize_aggregation_in_order With projections, GROUP BY/ORDER BY in query, optimize_aggregation_in_order, GROUP BY's InputOrderInfo was used incorrectly for ORDER BY. Signed-off-by: Azat Khuzhin --- src/Interpreters/InterpreterSelectQuery.cpp | 29 +++++-------------- ...BY_optimize_aggregation_in_order.reference | 13 +++++++++ ...RDERY_BY_optimize_aggregation_in_order.sql | 13 +++++++++ 3 files changed, 34 insertions(+), 21 deletions(-) create mode 100644 tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.reference create mode 100644 tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.sql diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5eaeecb9373..33b89536032 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1168,6 +1168,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

{}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage)); } + if (query_info.projection && query_info.projection->input_order_info && query_info.input_order_info) + throw Exception("InputOrderInfo is set for projection and for query", ErrorCodes::LOGICAL_ERROR); + InputOrderInfoPtr input_order_info_for_order; + if (!expressions.need_aggregate) + input_order_info_for_order = query_info.projection ? query_info.projection->input_order_info : query_info.input_order_info; + if (options.to_stage > QueryProcessingStage::FetchColumns) { auto preliminary_sort = [&]() @@ -1183,10 +1189,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info : nullptr)); + executeOrder(query_plan, input_order_info_for_order); if (expressions.has_order_by && query.limitLength()) executeDistinct(query_plan, false, expressions.selected_columns, true); @@ -1311,16 +1314,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info.reset(); - } - // Now we must execute: // 1) expressions before window functions, // 2) window functions, @@ -1455,10 +1451,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info : nullptr)); + executeOrder(query_plan, input_order_info_for_order); } /** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT, @@ -2747,12 +2740,6 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets) { - // const auto & input_order_info = query_info.input_order_info - // ? query_info.input_order_info - // : (query_info.projection ? query_info.projection->input_order_info : nullptr); - // if (input_order_info) - // executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins"); - const Settings & settings = context->getSettingsRef(); SizeLimits limits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode); diff --git a/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.reference b/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.reference new file mode 100644 index 00000000000..0a83fa24d49 --- /dev/null +++ b/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.reference @@ -0,0 +1,13 @@ +-- { echoOn } +select x + y, sum(x - y) as s from test_agg_proj_02302 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1, optimize_aggregation_in_order=0, optimize_read_in_order=0; +15 480 +14 450 +13 420 +12 390 +11 360 +select x + y, sum(x - y) as s from test_agg_proj_02302 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, optimize_read_in_order=1; +15 480 +14 450 +13 420 +12 390 +11 360 diff --git a/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.sql b/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.sql new file mode 100644 index 00000000000..be050cc3080 --- /dev/null +++ b/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.sql @@ -0,0 +1,13 @@ +-- Tags: no-s3-storage + +drop table if exists test_agg_proj_02302; + +create table test_agg_proj_02302 (x Int32, y Int32, PROJECTION x_plus_y (select sum(x - y), argMax(x, y) group by x + y)) ENGINE = MergeTree order by tuple() settings index_granularity = 1; +insert into test_agg_proj_02302 select intDiv(number, 2), -intDiv(number,3) - 1 from numbers(100); + +-- { echoOn } +select x + y, sum(x - y) as s from test_agg_proj_02302 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1, optimize_aggregation_in_order=0, optimize_read_in_order=0; +select x + y, sum(x - y) as s from test_agg_proj_02302 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, optimize_read_in_order=1; + +-- { echoOff } +drop table test_agg_proj_02302; From f69c3175af1265f2525d25a69dd76048dd831175 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 19 May 2022 10:13:44 +0000 Subject: [PATCH 264/615] Fix comments --- src/Functions/FunctionsConversion.h | 1 + src/Functions/FunctionsMiscellaneous.h | 7 ++++ src/Functions/IFunction.cpp | 5 +-- src/Functions/array/array.cpp | 1 + src/Functions/assumeNotNull.cpp | 10 +++++- src/Functions/indexHint.cpp | 2 -- src/Functions/map.cpp | 1 + src/Functions/materialize.h | 5 --- src/Functions/tuple.cpp | 1 + ...4_nothing_arguments_in_functions.reference | 24 +++++--------- .../02294_nothing_arguments_in_functions.sql | 33 +++++++++---------- ...ng_arguments_in_functions_errors.reference | 3 ++ ...4_nothing_arguments_in_functions_errors.sh | 10 ++++++ 13 files changed, 60 insertions(+), 43 deletions(-) create mode 100644 tests/queries/0_stateless/02294_nothing_arguments_in_functions_errors.reference create mode 100755 tests/queries/0_stateless/02294_nothing_arguments_in_functions_errors.sh diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index f8e8db5a0e9..bffc15cdc57 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2516,6 +2516,7 @@ protected: } bool useDefaultImplementationForNulls() const override { return false; } + /// CAST(Nothing, T) -> T bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } diff --git a/src/Functions/FunctionsMiscellaneous.h b/src/Functions/FunctionsMiscellaneous.h index ff27b0cc518..49da8f49c82 100644 --- a/src/Functions/FunctionsMiscellaneous.h +++ b/src/Functions/FunctionsMiscellaneous.h @@ -51,6 +51,9 @@ public: } bool useDefaultImplementationForNulls() const override { return false; } + /// It's possible if expression_actions contains function that don't use + /// default implementation for Nothing. + /// Example: arrayMap(x -> CAST(x, 'UInt8'), []); bool useDefaultImplementationForNothing() const override { return false; } private: @@ -119,6 +122,9 @@ public: String getName() const override { return "FunctionCapture"; } bool useDefaultImplementationForNulls() const override { return false; } + /// It's possible if expression_actions contains function that don't use + /// default implementation for Nothing and one of captured columns can be Nothing + /// Example: SELECT arrayMap(x -> [x, arrayElement(y, 0)], []), [] as y bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } @@ -249,6 +255,7 @@ public: String getName() const override { return name; } bool useDefaultImplementationForNulls() const override { return false; } + /// See comment in ExecutableFunctionCapture. bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override { return return_type; } diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 453c31302a0..cb03fdea1d1 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -225,8 +225,9 @@ ColumnPtr IExecutableFunction::defaultImplementationForNothing( getName(), result_type->getName()); - return ColumnConst::create(ColumnNothing::create(1), input_rows_count); - + if (input_rows_count > 0) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create non-empty column with type Nothing"); + return ColumnNothing::create(0); } ColumnPtr IExecutableFunction::executeWithoutLowCardinalityColumns( diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index 4ef530e9c88..b0a7daac522 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -20,6 +20,7 @@ public: } bool useDefaultImplementationForNulls() const override { return false; } + /// array(..., Nothing, ...) -> Array(..., Nothing, ...) bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/src/Functions/assumeNotNull.cpp b/src/Functions/assumeNotNull.cpp index 0fd1c08f855..8f999af9ef0 100644 --- a/src/Functions/assumeNotNull.cpp +++ b/src/Functions/assumeNotNull.cpp @@ -7,6 +7,12 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + namespace { @@ -33,7 +39,6 @@ public: size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; } @@ -46,6 +51,9 @@ public: { const ColumnPtr & col = arguments[0].column; + if (arguments[0].type->onlyNull() && !col->empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create non-empty column with type Nothing"); + if (const auto * nullable_col = checkAndGetColumn(*col)) return nullable_col->getNestedColumnPtr(); else diff --git a/src/Functions/indexHint.cpp b/src/Functions/indexHint.cpp index 1f3dd23cc31..bb38a56cf27 100644 --- a/src/Functions/indexHint.cpp +++ b/src/Functions/indexHint.cpp @@ -39,8 +39,6 @@ public: bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForNothing() const override { return false; } - bool isSuitableForConstantFolding() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 28c949b5dc3..8c891fdec81 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -65,6 +65,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForNulls() const override { return false; } + /// map(..., Nothing) -> Map(..., Nothing) bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/src/Functions/materialize.h b/src/Functions/materialize.h index e71e463066e..aab4e5bdbdf 100644 --- a/src/Functions/materialize.h +++ b/src/Functions/materialize.h @@ -23,11 +23,6 @@ public: return false; } - bool useDefaultImplementationForNothing() const override - { - return false; - } - /// Get the function name. String getName() const override { diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index 5a06ac21be4..4238b12157a 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -52,6 +52,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } bool useDefaultImplementationForNulls() const override { return false; } + /// tuple(..., Nothing, ...) -> Tuple(..., Nothing, ...) bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference index 9360b9a1922..665931efb71 100644 --- a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.reference @@ -4,32 +4,26 @@ Array(Nothing) Array(Nothing) [] Array(Nothing) -Array(Nothing) -Array(Nothing) [] Array(Nothing) +Array(String) Array(Nothing) Array(Nothing) - -Nothing -Const(Nothing) -Nothing -Const(Nothing) +Array(Array(Nothing)) +Array(Array(Nothing)) +Array(Map(UInt8, Nothing)) +Array(Map(UInt8, Nothing)) +Array(Tuple(Nothing)) +Array(Tuple(UInt8, Nothing)) +Nothing +Nothing Nothing Nothing -Array(Nothing) -Const(Array(Nothing)) Array(Nothing) Array(Nothing) Map(UInt8, Nothing) -Const(Map(UInt8, Nothing)) -Map(UInt8, Nothing) Map(UInt8, Nothing) Tuple(UInt8, Nothing) -Const(Tuple(UInt8, Nothing)) -Tuple(UInt8, Nothing) Tuple(UInt8, Nothing) Nothing -Const(Nothing) -Nothing Nothing diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql index 732664e081f..4406a05df0c 100644 --- a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql @@ -5,39 +5,36 @@ select toTypeName(arrayMap((x, y) -> x + y, [], [])); select arrayMap((x, y) -> x + y, [], CAST([], 'Array(Int32)')); select toTypeName(arrayMap((x, y) -> x + y, [], CAST([], 'Array(Int32)'))); -select toTypeName(arrayMap(x -> 2 * x, [assumeNotNull(NULL)])); -select toColumnTypeName(arrayMap(x -> 2 * x, [assumeNotNull(NULL)])); - select arrayFilter(x -> 2 * x < 0, []); select toTypeName(arrayFilter(x -> 2 * x < 0, [])); -select toTypeName(arrayFilter(x -> 2 * x < 0, [assumeNotNull(NULL)])); -select toColumnTypeName(arrayFilter(x -> 2 * x < 0, [assumeNotNull(NULL)])); -select CAST(assumeNotNull(NULL), 'String'); -select toTypeName(toInt32(assumeNotNull(NULL))); -select toColumnTypeName(toInt32(assumeNotNull(NULL))); +select toTypeName(arrayMap(x -> CAST(x, 'String'), [])); +select toTypeName(arrayMap(x -> toInt32(x), [])); +select toColumnTypeName(arrayMap(x -> toInt32(x), [])); + +select toTypeName(arrayMap(x -> [x], [])); +select toColumnTypeName(arrayMap(x -> [x], [])); + +select toTypeName(arrayMap(x ->map(1, x), [])); +select toColumnTypeName(arrayMap(x -> map(1, x), [])); + +select toTypeName(arrayMap(x ->tuple(x), [])); +select toColumnTypeName(arrayMap(x -> tuple(1, x), [])); + +select toTypeName(toInt32(assumeNotNull(materialize(NULL)))); +select toColumnTypeName(toInt32(assumeNotNull(materialize(NULL)))); -select toTypeName(assumeNotNull(NULL)); -select toColumnTypeName(assumeNotNull(NULL)); select toTypeName(assumeNotNull(materialize(NULL))); select toColumnTypeName(assumeNotNull(materialize(NULL))); -select toTypeName([assumeNotNull(NULL)]); -select toColumnTypeName([assumeNotNull(NULL)]); select toTypeName([assumeNotNull(materialize(NULL))]); select toColumnTypeName([assumeNotNull(materialize(NULL))]); -select toTypeName(map(1, assumeNotNull(NULL))); -select toColumnTypeName(map(1, assumeNotNull(NULL))); select toTypeName(map(1, assumeNotNull(materialize(NULL)))); select toColumnTypeName(map(1, assumeNotNull(materialize(NULL)))); -select toTypeName(tuple(1, assumeNotNull(NULL))); -select toColumnTypeName(tuple(1, assumeNotNull(NULL))); select toTypeName(tuple(1, assumeNotNull(materialize(NULL)))); select toColumnTypeName(tuple(1, assumeNotNull(materialize(NULL)))); -select toTypeName(assumeNotNull(NULL) * 2); -select toColumnTypeName(assumeNotNull(NULL) * 2); select toTypeName(assumeNotNull(materialize(NULL)) * 2); select toColumnTypeName(assumeNotNull(materialize(NULL)) * 2); diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions_errors.reference b/tests/queries/0_stateless/02294_nothing_arguments_in_functions_errors.reference new file mode 100644 index 00000000000..0eabe367130 --- /dev/null +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions_errors.reference @@ -0,0 +1,3 @@ +OK +OK +OK diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions_errors.sh b/tests/queries/0_stateless/02294_nothing_arguments_in_functions_errors.sh new file mode 100755 index 00000000000..931985340c2 --- /dev/null +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions_errors.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "SELECT assumeNotNull(NULL)" 2>&1 | grep -q "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL" +$CLICKHOUSE_LOCAL -q "SELECT assumeNotNull(materialize(NULL))" 2>&1 | grep -q "ILLEGAL_TYPE_OF_ARGUMENT" && echo "OK" || echo "FAIL" +$CLICKHOUSE_LOCAL -q "SELECT assumeNotNull(materialize(NULL)) from numbers(10)" 2>&1 | grep -q "ILLEGAL_TYPE_OF_ARGUMENT" && echo "OK" || echo "FAIL" + From f787dc7097b9da93fc59679548cc4704a4279f85 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 19 May 2022 13:24:48 +0300 Subject: [PATCH 265/615] Revert "Fix mutations in tables with columns of type `Object`" --- src/Interpreters/MutationsInterpreter.cpp | 6 ++---- src/Storages/MergeTree/MergeTask.cpp | 1 + src/Storages/MergeTree/MutateTask.cpp | 15 +++++-------- .../MergeTree/StorageFromMergeTreeDataPart.h | 17 --------------- .../01825_type_json_mutations.reference | 7 ------- .../0_stateless/01825_type_json_mutations.sql | 21 ------------------- 6 files changed, 8 insertions(+), 59 deletions(-) delete mode 100644 tests/queries/0_stateless/01825_type_json_mutations.reference delete mode 100644 tests/queries/0_stateless/01825_type_json_mutations.sql diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index a55de34efbc..99032dd9f10 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -758,9 +758,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & prepared_stages, bool dry_run) { - auto storage_snapshot = storage->getStorageSnapshot(metadata_snapshot, context); - auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects(); - auto all_columns = storage_snapshot->getColumns(options); + NamesAndTypesList all_columns = metadata_snapshot->getColumns().getAllPhysical(); /// Next, for each stage calculate columns changed by this and previous stages. for (size_t i = 0; i < prepared_stages.size(); ++i) @@ -804,7 +802,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & /// e.g. ALTER referencing the same table in scalar subquery bool execute_scalar_subqueries = !dry_run; auto syntax_result = TreeRewriter(context).analyze( - all_asts, all_columns, storage, storage_snapshot, + all_asts, all_columns, storage, storage->getStorageSnapshot(metadata_snapshot, context), false, true, execute_scalar_subqueries); if (execute_scalar_subqueries && context->hasQueryContext()) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 8732a3ed3e5..58bffaab34b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -149,6 +149,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->merging_columns, global_ctx->merging_column_names); + auto local_single_disk_volume = std::make_shared("volume_" + global_ctx->future_part->name, ctx->disk, 0); global_ctx->new_data_part = global_ctx->data->createPart( global_ctx->future_part->name, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index fb8f4ba0518..c41a2d3d52c 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -624,9 +624,7 @@ struct MutationContext FutureMergedMutatedPartPtr future_part; MergeTreeData::DataPartPtr source_part; - StoragePtr storage_from_source_part; StorageMetadataPtr metadata_snapshot; - MutationCommandsConstPtr commands; time_t time_of_mutation; ContextPtr context; @@ -1369,11 +1367,6 @@ MutateTask::MutateTask( ctx->space_reservation = space_reservation_; ctx->storage_columns = metadata_snapshot_->getColumns().getAllPhysical(); ctx->txn = txn; - ctx->source_part = ctx->future_part->parts[0]; - ctx->storage_from_source_part = std::make_shared(ctx->source_part); - - auto storage_snapshot = ctx->storage_from_source_part->getStorageSnapshot(ctx->metadata_snapshot, context_); - extendObjectColumns(ctx->storage_columns, storage_snapshot->object_columns, /*with_subcolumns=*/ false); } @@ -1412,6 +1405,8 @@ bool MutateTask::prepare() "This is a bug.", toString(ctx->future_part->parts.size())); ctx->num_mutations = std::make_unique(CurrentMetrics::PartMutation); + ctx->source_part = ctx->future_part->parts[0]; + auto storage_from_source_part = std::make_shared(ctx->source_part); auto context_for_reading = Context::createCopy(ctx->context); context_for_reading->setSetting("max_streams_to_max_threads_ratio", 1); @@ -1422,13 +1417,13 @@ bool MutateTask::prepare() for (const auto & command : *ctx->commands) { - if (command.partition == nullptr || ctx->source_part->info.partition_id == ctx->data->getPartitionIDFromQuery( + if (command.partition == nullptr || ctx->future_part->parts[0]->info.partition_id == ctx->data->getPartitionIDFromQuery( command.partition, context_for_reading)) ctx->commands_for_part.emplace_back(command); } if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations( - ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading))) + storage_from_source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading))) { LOG_TRACE(ctx->log, "Part {} doesn't change up to mutation version {}", ctx->source_part->name, ctx->future_part->part_info.mutation); promise.set_value(ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_clone_", ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false)); @@ -1446,7 +1441,7 @@ bool MutateTask::prepare() if (!ctx->for_interpreter.empty()) { ctx->interpreter = std::make_unique( - ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true); + storage_from_source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true); ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices(); ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections(); ctx->mutation_kind = ctx->interpreter->getMutationKind(); diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index a9bcb353b84..5be20c9a2d5 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -37,20 +36,6 @@ public: String getName() const override { return "FromMergeTreeDataPart"; } - StorageSnapshotPtr getStorageSnapshot( - const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override - { - const auto & storage_columns = metadata_snapshot->getColumns(); - if (!hasObjectColumns(storage_columns)) - return std::make_shared(*this, metadata_snapshot); - - auto object_columns = getObjectColumns( - parts.begin(), parts.end(), - storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); }); - - return std::make_shared(*this, metadata_snapshot, object_columns); - } - Pipe read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, @@ -80,8 +65,6 @@ public: bool supportsIndexForIn() const override { return true; } - bool supportsDynamicSubcolumns() const override { return true; } - bool mayBenefitFromIndexForIn( const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override { diff --git a/tests/queries/0_stateless/01825_type_json_mutations.reference b/tests/queries/0_stateless/01825_type_json_mutations.reference deleted file mode 100644 index da7c278497e..00000000000 --- a/tests/queries/0_stateless/01825_type_json_mutations.reference +++ /dev/null @@ -1,7 +0,0 @@ -1 q (1,2,[('aaa'),('bbb')]) -2 w (3,4,[('ccc')]) -3 e (5,6,[]) -1 q (1,2,[('aaa'),('bbb')]) -3 e (5,6,[]) -1 foo -3 foo diff --git a/tests/queries/0_stateless/01825_type_json_mutations.sql b/tests/queries/0_stateless/01825_type_json_mutations.sql deleted file mode 100644 index a16710bdbf7..00000000000 --- a/tests/queries/0_stateless/01825_type_json_mutations.sql +++ /dev/null @@ -1,21 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS t_json_mutations; - -SET allow_experimental_object_type = 1; -SET output_format_json_named_tuples_as_objects = 1; -SET mutations_sync = 2; - -CREATE TABLE t_json_mutations(id UInt32, s String, obj JSON) ENGINE = MergeTree ORDER BY id; - -INSERT INTO t_json_mutations VALUES (1, 'q', '{"k1": 1, "k2": 2, "k3": [{"k4": "aaa"}, {"k4": "bbb"}]}'); -INSERT INTO t_json_mutations VALUES (2, 'w', '{"k1": 3, "k2": 4, "k3": [{"k4": "ccc"}]}'); -INSERT INTO t_json_mutations VALUES (3, 'e', '{"k1": 5, "k2": 6}'); - -SELECT * FROM t_json_mutations ORDER BY id; -ALTER TABLE t_json_mutations DELETE WHERE id = 2; -SELECT * FROM t_json_mutations ORDER BY id; -ALTER TABLE t_json_mutations DROP COLUMN s, DROP COLUMN obj, ADD COLUMN t String DEFAULT 'foo'; -SELECT * FROM t_json_mutations ORDER BY id; - -DROP TABLE t_json_mutations; From 468954cc87db08516aae731bb4d68ab7293a03c2 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 19 May 2022 12:52:33 +0200 Subject: [PATCH 266/615] Remove redundant include --- src/Storages/ColumnsDescription.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index dcfcd81f6f7..e11c2477572 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -31,7 +31,6 @@ #include #include -#include namespace DB { From 0092b3e6b5ac87d62d62ddcf23bc84dc9e9f12af Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 19 May 2022 12:52:52 +0200 Subject: [PATCH 267/615] Mark test as no-parallel --- .../queries/0_stateless/02302_defaults_in_columnar_formats.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql index 0262199b713..5946f2d37e5 100644 --- a/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql +++ b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel insert into function file(data_02302.parquet) select 1 as x settings engine_file_truncate_on_insert=1; select * from file(data_02302.parquet, auto, 'x UInt8, y default 42, z default x + y') settings input_format_parquet_allow_missing_columns=1; From 6e158a799642ef2b347d775b3bb3b79fb6c5d7ea Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 19 May 2022 12:53:40 +0200 Subject: [PATCH 268/615] Mark test as no-parallel --- .../0_stateless/02304_orc_arrow_parquet_string_as_string.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql index e354f303c7f..2d971bba9db 100644 --- a/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql +++ b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel insert into function file(data_02304.parquet) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_parquet_string_as_string=1; desc file(data_02304.parquet); From 456444eadb04285a0d7bf0266bb73dad3f840b87 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 26 Apr 2022 17:45:43 +0300 Subject: [PATCH 269/615] Add storage_configuration example into config.xml Signed-off-by: Azat Khuzhin --- programs/server/config.xml | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/programs/server/config.xml b/programs/server/config.xml index b76a677eb92..5cc6861b0b2 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -365,6 +365,58 @@ /var/lib/clickhouse/ + + + + /var/lib/clickhouse/tmp/ From 590dfe6ae59c3d33d72e59ae0cc9468c7b91d80a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 26 Apr 2022 17:48:29 +0300 Subject: [PATCH 270/615] doc: document perform_ttl_move_on_insert Signed-off-by: Azat Khuzhin --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 1029cceb28a..add939f8304 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -695,6 +695,7 @@ Tags: - `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. - `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. - `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). Cofiguration examples: From dad31fe5a097777902606a9f823756dccd0fdfa1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 26 Apr 2022 17:49:02 +0300 Subject: [PATCH 271/615] doc/mergetree: fix aligment in documentation Signed-off-by: Azat Khuzhin --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index add939f8304..c5ff98ef903 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -725,7 +725,7 @@ Cofiguration examples: 0.2 - +

jbod1 From 9d734bd6779e5b9d0b87374ee2cddbd08ee4a3a4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 3 May 2022 12:21:28 +0300 Subject: [PATCH 272/615] Disks: Remove mentions about RAID1 Signed-off-by: Azat Khuzhin --- src/Disks/IVolume.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Disks/IVolume.h b/src/Disks/IVolume.h index 41260cac2e9..a13d8f43331 100644 --- a/src/Disks/IVolume.h +++ b/src/Disks/IVolume.h @@ -11,7 +11,6 @@ namespace DB enum class VolumeType { JBOD, - RAID1, SINGLE_DISK, UNKNOWN }; From 4bc849b9f0e4e987dcd0f13ec19a81f24902cdf0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 18 May 2022 16:49:17 +0300 Subject: [PATCH 273/615] Disks: Remove unused src/Disks/SingleDiskVolume.cpp Signed-off-by: Azat Khuzhin --- src/Disks/SingleDiskVolume.cpp | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 src/Disks/SingleDiskVolume.cpp diff --git a/src/Disks/SingleDiskVolume.cpp b/src/Disks/SingleDiskVolume.cpp deleted file mode 100644 index 47140407026..00000000000 --- a/src/Disks/SingleDiskVolume.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include - -namespace DB -{ - -} From ba26b3cf4c982f681f7f9782bba2662f97620835 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 26 Apr 2022 17:58:09 +0300 Subject: [PATCH 274/615] Implement lead_used load balancing algorithm for disks inside volume v2: rebase on top removed raid1 Signed-off-by: Azat Khuzhin --- .../mergetree-family/mergetree.md | 2 + programs/server/config.xml | 1 + src/Disks/IVolume.cpp | 14 ++ src/Disks/IVolume.h | 22 ++- src/Disks/StoragePolicy.cpp | 7 +- src/Disks/VolumeJBOD.cpp | 67 +++++++-- src/Disks/VolumeJBOD.h | 21 ++- .../test_jbod_load_balancing/__init__.py | 0 .../config.d/storage_configuration.xml | 39 +++++ .../test_jbod_load_balancing/test.py | 136 ++++++++++++++++++ 10 files changed, 290 insertions(+), 19 deletions(-) create mode 100644 tests/integration/test_jbod_load_balancing/__init__.py create mode 100644 tests/integration/test_jbod_load_balancing/configs/config.d/storage_configuration.xml create mode 100644 tests/integration/test_jbod_load_balancing/test.py diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index c5ff98ef903..d59b07b5dd6 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -669,6 +669,7 @@ Storage policies configuration markup: disk_name_from_disks_configuration 1073741824 + round_robin @@ -696,6 +697,7 @@ Tags: - `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. - `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. - `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). +- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. Cofiguration examples: diff --git a/programs/server/config.xml b/programs/server/config.xml index 5cc6861b0b2..203684a9e00 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -408,6 +408,7 @@ true false + round_robin
0.2 diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index 85568fdd05b..09f42cc5467 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -7,17 +7,31 @@ namespace DB { + namespace ErrorCodes { extern const int NO_ELEMENTS_IN_CONFIG; + extern const int EXCESSIVE_ELEMENT_IN_CONFIG; } + +VolumeLoadBalancing parseVolumeLoadBalancing(const String & config) +{ + if (config == "round_robin") + return VolumeLoadBalancing::ROUND_ROBIN; + if (config == "least_used") + return VolumeLoadBalancing::LEAST_USED; + throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "'{}' is not valid load_balancing value", config); +} + + IVolume::IVolume( String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disk_selector) : name(std::move(name_)) + , load_balancing(parseVolumeLoadBalancing(config.getString(config_prefix + ".load_balancing", "round_robin"))) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); diff --git a/src/Disks/IVolume.h b/src/Disks/IVolume.h index a13d8f43331..26d4c96f481 100644 --- a/src/Disks/IVolume.h +++ b/src/Disks/IVolume.h @@ -15,6 +15,14 @@ enum class VolumeType UNKNOWN }; +enum class VolumeLoadBalancing +{ + ROUND_ROBIN, + LEAST_USED, +}; + +VolumeLoadBalancing parseVolumeLoadBalancing(const String & config); + class IVolume; using VolumePtr = std::shared_ptr; using Volumes = std::vector; @@ -33,11 +41,19 @@ using Volumes = std::vector; class IVolume : public Space { public: - IVolume(String name_, Disks disks_, size_t max_data_part_size_ = 0, bool perform_ttl_move_on_insert_ = true) + /// This constructor is only for: + /// - SingleDiskVolume + /// From createVolumeFromReservation(). + IVolume(String name_, + Disks disks_, + size_t max_data_part_size_ = 0, + bool perform_ttl_move_on_insert_ = true, + VolumeLoadBalancing load_balancing_ = VolumeLoadBalancing::ROUND_ROBIN) : disks(std::move(disks_)) , name(name_) , max_data_part_size(max_data_part_size_) , perform_ttl_move_on_insert(perform_ttl_move_on_insert_) + , load_balancing(load_balancing_) { } @@ -78,6 +94,10 @@ public: /// Should a new data part be synchronously moved to a volume according to ttl on insert /// or move this part in background task asynchronously after insert. bool perform_ttl_move_on_insert = true; + /// Load balancing, one of: + /// - ROUND_ROBIN + /// - LEAST_USED + const VolumeLoadBalancing load_balancing; }; } diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 11f53d10fb4..3dd60ac02d4 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -63,7 +63,12 @@ StoragePolicy::StoragePolicy( if (volumes.empty() && name == DEFAULT_STORAGE_POLICY_NAME) { - auto default_volume = std::make_shared(DEFAULT_VOLUME_NAME, std::vector{disks->get(DEFAULT_DISK_NAME)}, 0, false); + auto default_volume = std::make_shared(DEFAULT_VOLUME_NAME, + std::vector{disks->get(DEFAULT_DISK_NAME)}, + /* max_data_part_size_= */ 0, + /* are_merges_avoided_= */ false, + /* perform_ttl_move_on_insert_= */ true, + VolumeLoadBalancing::ROUND_ROBIN); volumes.emplace_back(std::move(default_volume)); } diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index e0f7dfc8231..f202dda03a2 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -19,15 +19,18 @@ VolumeJBOD::VolumeJBOD( const String & config_prefix, DiskSelectorPtr disk_selector) : IVolume(name_, config, config_prefix, disk_selector) + , disks_by_size(disks.begin(), disks.end()) { Poco::Logger * logger = &Poco::Logger::get("StorageConfiguration"); auto has_max_bytes = config.has(config_prefix + ".max_data_part_size_bytes"); auto has_max_ratio = config.has(config_prefix + ".max_data_part_size_ratio"); if (has_max_bytes && has_max_ratio) + { throw Exception( "Only one of 'max_data_part_size_bytes' and 'max_data_part_size_ratio' should be specified.", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + } if (has_max_bytes) { @@ -47,12 +50,20 @@ VolumeJBOD::VolumeJBOD( } max_data_part_size = static_cast(sum_size * ratio / disks.size()); for (size_t i = 0; i < disks.size(); ++i) + { if (sizes[i] < max_data_part_size) - LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + { + LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", + backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + } + } } static constexpr UInt64 MIN_PART_SIZE = 8u * 1024u * 1024u; if (max_data_part_size != 0 && max_data_part_size < MIN_PART_SIZE) - LOG_WARNING(logger, "Volume {} max_data_part_size is too low ({} < {})", backQuote(name), ReadableSize(max_data_part_size), ReadableSize(MIN_PART_SIZE)); + { + LOG_WARNING(logger, "Volume {} max_data_part_size is too low ({} < {})", + backQuote(name), ReadableSize(max_data_part_size), ReadableSize(MIN_PART_SIZE)); + } /// Default value is 'true' due to backward compatibility. perform_ttl_move_on_insert = config.getBool(config_prefix + ".perform_ttl_move_on_insert", true); @@ -72,31 +83,61 @@ VolumeJBOD::VolumeJBOD(const VolumeJBOD & volume_jbod, DiskPtr VolumeJBOD::getDisk(size_t /* index */) const { - size_t start_from = last_used.fetch_add(1u, std::memory_order_acq_rel); - size_t index = start_from % disks.size(); - return disks[index]; + switch (load_balancing) + { + case VolumeLoadBalancing::ROUND_ROBIN: + { + size_t start_from = last_used.fetch_add(1u, std::memory_order_acq_rel); + size_t index = start_from % disks.size(); + return disks[index]; + } + case VolumeLoadBalancing::LEAST_USED: + { + std::lock_guard lock(mutex); + return disks_by_size.top(); + } + } + __builtin_unreachable(); } ReservationPtr VolumeJBOD::reserve(UInt64 bytes) { /// This volume can not store data which size is greater than `max_data_part_size` /// to ensure that parts of size greater than that go to another volume(s). - if (max_data_part_size != 0 && bytes > max_data_part_size) return {}; - size_t start_from = last_used.fetch_add(1u, std::memory_order_acq_rel); - size_t disks_num = disks.size(); - for (size_t i = 0; i < disks_num; ++i) + switch (load_balancing) { - size_t index = (start_from + i) % disks_num; + case VolumeLoadBalancing::ROUND_ROBIN: + { + size_t start_from = last_used.fetch_add(1u, std::memory_order_acq_rel); + size_t disks_num = disks.size(); + for (size_t i = 0; i < disks_num; ++i) + { + size_t index = (start_from + i) % disks_num; - auto reservation = disks[index]->reserve(bytes); + auto reservation = disks[index]->reserve(bytes); + + if (reservation) + return reservation; + } + return {}; + } + case VolumeLoadBalancing::LEAST_USED: + { + std::lock_guard lock(mutex); + + DiskPtr disk = disks_by_size.top(); + ReservationPtr reservation = disk->reserve(bytes); + + disks_by_size.pop(); + disks_by_size.push(disk); - if (reservation) return reservation; + } } - return {}; + __builtin_unreachable(); } bool VolumeJBOD::areMergesAvoided() const diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h index 621125f1109..5c9d2921ca5 100644 --- a/src/Disks/VolumeJBOD.h +++ b/src/Disks/VolumeJBOD.h @@ -22,8 +22,8 @@ using VolumesJBOD = std::vector; class VolumeJBOD : public IVolume { public: - VolumeJBOD(String name_, Disks disks_, UInt64 max_data_part_size_, bool are_merges_avoided_) - : IVolume(name_, disks_, max_data_part_size_) + VolumeJBOD(String name_, Disks disks_, UInt64 max_data_part_size_, bool are_merges_avoided_, bool perform_ttl_move_on_insert_, VolumeLoadBalancing load_balancing_) + : IVolume(name_, disks_, max_data_part_size_, perform_ttl_move_on_insert_, load_balancing_) , are_merges_avoided(are_merges_avoided_) { } @@ -44,7 +44,8 @@ public: VolumeType getType() const override { return VolumeType::JBOD; } - /// Always returns next disk (round-robin), ignores argument. + /// Returns disk based on the load balancing algorithm (round-robin, or least-used), + /// ignores @index argument. /// /// - Used with policy for temporary data /// - Ignores all limitations @@ -63,8 +64,20 @@ public: bool are_merges_avoided = true; private: - /// Index of last used disk. + struct DiskBySize + { + bool operator()(const DiskPtr & lhs, const DiskPtr & rhs) const + { + /// TODO: avoid getAvailableSpace() calls + return lhs->getUnreservedSpace() < rhs->getUnreservedSpace(); + } + }; + + mutable std::mutex mutex; + /// Index of last used disk, for load_balancing=round_robin mutable std::atomic last_used = 0; + /// Priority queue of disks sorted by size, for load_balancing=least_used + mutable std::priority_queue, DiskBySize> disks_by_size; /// True if parts on this volume participate in merges according to START/STOP MERGES ON VOLUME. std::atomic> are_merges_avoided_user_override{std::nullopt}; diff --git a/tests/integration/test_jbod_load_balancing/__init__.py b/tests/integration/test_jbod_load_balancing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_jbod_load_balancing/configs/config.d/storage_configuration.xml b/tests/integration/test_jbod_load_balancing/configs/config.d/storage_configuration.xml new file mode 100644 index 00000000000..529eb1bc0b5 --- /dev/null +++ b/tests/integration/test_jbod_load_balancing/configs/config.d/storage_configuration.xml @@ -0,0 +1,39 @@ + + + + + /jbod1/ + + + /jbod2/ + + + /jbod3/ + + + + + + + jbod1 + jbod2 + jbod3 + + + + + + + + + jbod1 + jbod2 + jbod3 + + least_used + + + + + + diff --git a/tests/integration/test_jbod_load_balancing/test.py b/tests/integration/test_jbod_load_balancing/test.py new file mode 100644 index 00000000000..9c62d1bbdfc --- /dev/null +++ b/tests/integration/test_jbod_load_balancing/test.py @@ -0,0 +1,136 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/storage_configuration.xml", + ], + tmpfs=[ + "/jbod1:size=100M", + "/jbod2:size=200M", + "/jbod3:size=300M", + ], +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_jbod_load_balancing_round_robin(start_cluster): + try: + node.query( + """ + CREATE TABLE data_round_robin (p UInt8) + ENGINE = MergeTree + ORDER BY tuple() + SETTINGS storage_policy = 'jbod_round_robin'; + + SYSTEM STOP MERGES data_round_robin; + + INSERT INTO data_round_robin SELECT * FROM numbers(10); + INSERT INTO data_round_robin SELECT * FROM numbers(10); + INSERT INTO data_round_robin SELECT * FROM numbers(10); + INSERT INTO data_round_robin SELECT * FROM numbers(10); + """ + ) + + parts = node.query( + """ + SELECT count(), disk_name + FROM system.parts + WHERE table = 'data_round_robin' + GROUP BY disk_name + ORDER BY disk_name + """ + ) + parts = [l.split("\t") for l in parts.strip().split("\n")] + assert parts == [ + ["2", "jbod1"], + ["1", "jbod2"], + ["1", "jbod3"], + ] + finally: + node.query("DROP TABLE IF EXISTS data_round_robin SYNC") + + +def test_jbod_load_balancing_least_used(start_cluster): + try: + node.query( + """ + CREATE TABLE data_least_used (p UInt8) + ENGINE = MergeTree + ORDER BY tuple() + SETTINGS storage_policy = 'jbod_least_used'; + + SYSTEM STOP MERGES data_least_used; + + INSERT INTO data_least_used SELECT * FROM numbers(10); + INSERT INTO data_least_used SELECT * FROM numbers(10); + INSERT INTO data_least_used SELECT * FROM numbers(10); + INSERT INTO data_least_used SELECT * FROM numbers(10); + """ + ) + + parts = node.query( + """ + SELECT count(), disk_name + FROM system.parts + WHERE table = 'data_least_used' + GROUP BY disk_name + ORDER BY disk_name + """ + ) + parts = [l.split("\t") for l in parts.strip().split("\n")] + assert parts == [ + ["4", "jbod3"], + ] + finally: + node.query("DROP TABLE IF EXISTS data_least_used SYNC") + + +def test_jbod_load_balancing_least_used_next_disk(start_cluster): + try: + node.query( + """ + CREATE TABLE data_least_used_next_disk + ( + s String CODEC(NONE) + ) + ENGINE = MergeTree + ORDER BY tuple() + SETTINGS storage_policy = 'jbod_least_used'; + + SYSTEM STOP MERGES data_least_used_next_disk; + + -- 100MiB each part, 3 parts in total + INSERT INTO data_least_used_next_disk SELECT repeat('a', 100) FROM numbers(3e6) SETTINGS max_block_size='1Mi'; + """ + ) + + parts = node.query( + """ + SELECT count(), disk_name + FROM system.parts + WHERE table = 'data_least_used_next_disk' + GROUP BY disk_name + ORDER BY disk_name + """ + ) + parts = [l.split("\t") for l in parts.strip().split("\n")] + assert parts == [ + ["1", "jbod2"], + ["2", "jbod3"], + ] + finally: + node.query("DROP TABLE IF EXISTS data_least_used_next_disk SYNC") From 1d98913f90379345e80e75c746b8ffa34e2cbb0f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 18 May 2022 16:54:51 +0300 Subject: [PATCH 275/615] Disks: Reduce number of statfs() calls for least_used disk load balancing policy Signed-off-by: Azat Khuzhin --- src/Disks/VolumeJBOD.cpp | 8 ++++---- src/Disks/VolumeJBOD.h | 25 ++++++++++++++++++++----- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index f202dda03a2..401822fc901 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -94,7 +94,7 @@ DiskPtr VolumeJBOD::getDisk(size_t /* index */) const case VolumeLoadBalancing::LEAST_USED: { std::lock_guard lock(mutex); - return disks_by_size.top(); + return disks_by_size.top().disk; } } __builtin_unreachable(); @@ -128,10 +128,10 @@ ReservationPtr VolumeJBOD::reserve(UInt64 bytes) { std::lock_guard lock(mutex); - DiskPtr disk = disks_by_size.top(); - ReservationPtr reservation = disk->reserve(bytes); - + DiskWithSize disk = disks_by_size.top(); disks_by_size.pop(); + + ReservationPtr reservation = disk.reserve(bytes); disks_by_size.push(disk); return reservation; diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h index 5c9d2921ca5..0fb80422d55 100644 --- a/src/Disks/VolumeJBOD.h +++ b/src/Disks/VolumeJBOD.h @@ -64,12 +64,27 @@ public: bool are_merges_avoided = true; private: - struct DiskBySize + struct DiskWithSize { - bool operator()(const DiskPtr & lhs, const DiskPtr & rhs) const + DiskPtr disk; + uint64_t free_size = 0; + + DiskWithSize(DiskPtr disk_) + : disk(disk_) + , free_size(disk->getUnreservedSpace()) + {} + + bool operator<(const DiskWithSize & rhs) const { - /// TODO: avoid getAvailableSpace() calls - return lhs->getUnreservedSpace() < rhs->getUnreservedSpace(); + return free_size < rhs.free_size; + } + + ReservationPtr reserve(uint64_t bytes) + { + ReservationPtr reservation = disk->reserve(bytes); + if (reservation) + free_size -= bytes; + return reservation; } }; @@ -77,7 +92,7 @@ private: /// Index of last used disk, for load_balancing=round_robin mutable std::atomic last_used = 0; /// Priority queue of disks sorted by size, for load_balancing=least_used - mutable std::priority_queue, DiskBySize> disks_by_size; + mutable std::priority_queue disks_by_size; /// True if parts on this volume participate in merges according to START/STOP MERGES ON VOLUME. std::atomic> are_merges_avoided_user_override{std::nullopt}; From 757894b10fceab61f7a779e94133ae5cb5dace33 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 19 May 2022 13:24:27 +0300 Subject: [PATCH 276/615] Disks: Introduce IReservation::getUnreservedSpace() Signed-off-by: Azat Khuzhin --- src/Disks/DiskDecorator.h | 1 + src/Disks/DiskEncrypted.cpp | 1 + src/Disks/DiskLocal.cpp | 34 ++++++++++++++++++++++------------ src/Disks/DiskLocal.h | 2 +- src/Disks/IDisk.h | 4 ++++ src/Disks/IDiskRemote.cpp | 22 ++++++++++++++-------- src/Disks/IDiskRemote.h | 12 +++++++++--- 7 files changed, 52 insertions(+), 24 deletions(-) diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index d707eb3e51d..325d8b6704b 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -96,6 +96,7 @@ class ReservationDelegate : public IReservation public: ReservationDelegate(ReservationPtr delegate_, DiskPtr wrapper_) : delegate(std::move(delegate_)), wrapper(wrapper_) { } UInt64 getSize() const override { return delegate->getSize(); } + UInt64 getUnreservedSpace() const override { return delegate->getUnreservedSpace(); } DiskPtr getDisk(size_t) const override { return wrapper; } Disks getDisks() const override { return {wrapper}; } void update(UInt64 new_size) override { delegate->update(new_size); } diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 4ac59af95ab..8edb00e5a67 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -182,6 +182,7 @@ public: } UInt64 getSize() const override { return reservation->getSize(); } + UInt64 getUnreservedSpace() const override { return reservation->getUnreservedSpace(); } DiskPtr getDisk(size_t i) const override { diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 8abf0b24782..eedf0f12f49 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -112,12 +112,15 @@ std::optional fileSizeSafe(const fs::path & path) class DiskLocalReservation : public IReservation { public: - DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) - { - } + DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_, UInt64 unreserved_space_) + : disk(disk_) + , size(size_) + , unreserved_space(unreserved_space_) + , metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + {} UInt64 getSize() const override { return size; } + UInt64 getUnreservedSpace() const override { return unreserved_space; } DiskPtr getDisk(size_t i) const override { @@ -165,6 +168,7 @@ public: private: DiskLocalPtr disk; UInt64 size; + UInt64 unreserved_space; CurrentMetrics::Increment metric_increment; }; @@ -201,32 +205,38 @@ private: ReservationPtr DiskLocal::reserve(UInt64 bytes) { - if (!tryReserve(bytes)) + auto unreserved_space = tryReserve(bytes); + if (!unreserved_space.has_value()) return {}; - return std::make_unique(std::static_pointer_cast(shared_from_this()), bytes); + return std::make_unique( + std::static_pointer_cast(shared_from_this()), + bytes, unreserved_space.value()); } -bool DiskLocal::tryReserve(UInt64 bytes) +std::optional DiskLocal::tryReserve(UInt64 bytes) { std::lock_guard lock(DiskLocal::reservation_mutex); + + UInt64 available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (bytes == 0) { LOG_DEBUG(log, "Reserving 0 bytes on disk {}", backQuote(name)); ++reservation_count; - return true; + return {unreserved_space}; } - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); if (unreserved_space >= bytes) { LOG_DEBUG(log, "Reserving {} on disk {}, having unreserved {}.", ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return true; + return {unreserved_space - bytes}; } - return false; + + return {}; } static UInt64 getTotalSpaceByName(const String & name, const String & disk_path, UInt64 keep_free_space_bytes) diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 61faccbe2a5..62b03e7b2ed 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -121,7 +121,7 @@ public: bool canWrite() const noexcept; private: - bool tryReserve(UInt64 bytes); + std::optional tryReserve(UInt64 bytes); /// Setup disk for healthy check. Returns true if it's read-write, false if read-only. /// Throw exception if it's not possible to setup necessary files and directories. diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index c4578d51b6e..440cf31682d 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -392,6 +392,10 @@ public: /// Get reservation size. virtual UInt64 getSize() const = 0; + /// Space available for reservation + /// (with this reservation already take into account). + virtual UInt64 getUnreservedSpace() const = 0; + /// Get i-th disk where reservation take place. virtual DiskPtr getDisk(size_t i = 0) const = 0; /// NOLINT diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index d72d7004cb7..326ee88eea6 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -637,34 +637,40 @@ void IDiskRemote::createHardLink(const String & src_path, const String & dst_pat ReservationPtr IDiskRemote::reserve(UInt64 bytes) { - if (!tryReserve(bytes)) + auto unreserved_space = tryReserve(bytes); + if (!unreserved_space.has_value()) return {}; - return std::make_unique(std::static_pointer_cast(shared_from_this()), bytes); + return std::make_unique( + std::static_pointer_cast(shared_from_this()), + bytes, unreserved_space.value()); } -bool IDiskRemote::tryReserve(UInt64 bytes) +std::optional IDiskRemote::tryReserve(UInt64 bytes) { std::lock_guard lock(reservation_mutex); + + auto available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (bytes == 0) { LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); ++reservation_count; - return true; + return {unreserved_space}; } - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); if (unreserved_space >= bytes) { LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return true; + return {unreserved_space - bytes}; } - return false; + + return {}; } String IDiskRemote::getUniqueId(const String & path) const diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 65bcdf3e719..96da7dc4f23 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -177,7 +177,7 @@ private: void removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove); - bool tryReserve(UInt64 bytes); + std::optional tryReserve(UInt64 bytes); UInt64 reserved_bytes = 0; UInt64 reservation_count = 0; @@ -250,13 +250,18 @@ private: class DiskRemoteReservation final : public IReservation { public: - DiskRemoteReservation(const RemoteDiskPtr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + DiskRemoteReservation(const RemoteDiskPtr & disk_, UInt64 size_, UInt64 unreserved_space_) + : disk(disk_) + , size(size_) + , unreserved_space(unreserved_space_) + , metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) { } UInt64 getSize() const override { return size; } + UInt64 getUnreservedSpace() const override { return unreserved_space; } + DiskPtr getDisk(size_t i) const override; Disks getDisks() const override { return {disk}; } @@ -268,6 +273,7 @@ public: private: RemoteDiskPtr disk; UInt64 size; + UInt64 unreserved_space; CurrentMetrics::Increment metric_increment; }; From a3509912843852a2c3d7d5d0bc0e8a83c12ecea1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 19 May 2022 13:35:05 +0300 Subject: [PATCH 277/615] Disks: More accurate free space calculation for least_used Signed-off-by: Azat Khuzhin --- src/Disks/VolumeJBOD.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h index 0fb80422d55..21d61e6dd8d 100644 --- a/src/Disks/VolumeJBOD.h +++ b/src/Disks/VolumeJBOD.h @@ -82,8 +82,9 @@ private: ReservationPtr reserve(uint64_t bytes) { ReservationPtr reservation = disk->reserve(bytes); - if (reservation) - free_size -= bytes; + /// Not just subtract bytes, but update the value, + /// since some reservations may be done directly via IDisk, or not by ClickHouse. + free_size = reservation->getUnreservedSpace(); return reservation; } }; From 6338368a1c0262409b9a584b85ff26c4479f2fd7 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Thu, 19 May 2022 20:14:52 +0800 Subject: [PATCH 278/615] optimize file segment getCacheReadBuffer --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 541d03f5c20..1cb6354d38c 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -282,6 +282,20 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( } case FileSegment::State::PARTIALLY_DOWNLOADED: { + if (file_segment->getDownloadOffset() > file_offset_of_buffer_end) + { + /// segment{k} state: PARTIALLY_DOWNLOADED + /// cache: [______|___________ + /// ^ + /// download_offset (in progress) + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + auto downloader_id = file_segment->getOrSetDownloader(); if (downloader_id == file_segment->getCallerId()) { From 797edb0ae188db278c076d19dbd59f007c41d61f Mon Sep 17 00:00:00 2001 From: vxider Date: Thu, 19 May 2022 20:49:02 +0800 Subject: [PATCH 279/615] Update src/Storages/WindowView/StorageWindowView.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Storages/WindowView/StorageWindowView.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 893e0eefb86..eb95a82d210 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -465,6 +465,9 @@ void StorageWindowView::alter( auto old_inner_table_id = inner_table_id; modifying_query = true; + SCOPE_EXIT({ + modifying_query = false; + }); shutdown(); auto inner_query = initInnerQuery(new_select_query->as(), local_context); From 775b3b6a055b338c632867912738f1f192f6dd2c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 May 2022 15:51:01 +0300 Subject: [PATCH 280/615] Update format-changelog.py --- utils/changelog-simple/format-changelog.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/utils/changelog-simple/format-changelog.py b/utils/changelog-simple/format-changelog.py index 2a407c23965..d5e1518270e 100755 --- a/utils/changelog-simple/format-changelog.py +++ b/utils/changelog-simple/format-changelog.py @@ -51,9 +51,7 @@ def parse_one_pull_request(item): category = re.sub(r"^[-*\s]*", "", lines[i]) i += 1 - elif re.match( - r"(?i).*change\s*log\s*entry", lines[i] - ): + elif re.match(r"(?i).*change\s*log\s*entry", lines[i]): i += 1 # Can have one empty line between header and the entry itself. Filter it out. if i < len(lines) and not lines[i]: @@ -67,7 +65,6 @@ def parse_one_pull_request(item): else: i += 1 - if not category: # Shouldn't happen, because description check in CI should catch such PRs. # Fall through, so that it shows up in output and the user can fix it. From b2c7ba7cbe0e50a8b742ad09b4cdb45712f7f82c Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 19 May 2022 12:54:41 +0000 Subject: [PATCH 281/615] update --- src/Storages/WindowView/StorageWindowView.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 35040c0f10a..c03caed3d42 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -472,6 +472,7 @@ void StorageWindowView::alter( SCOPE_EXIT({ modifying_query = false; }); + shutdown(); auto inner_query = initInnerQuery(new_select_query->as(), local_context); @@ -503,7 +504,6 @@ void StorageWindowView::alter( setInMemoryMetadata(new_metadata); startup(); - modifying_query = false; } void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const @@ -1358,6 +1358,7 @@ void StorageWindowView::writeIntoWindowView( { while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); + if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) { std::lock_guard lock(window_view.fire_signal_mutex); From 56ec467ba76d5ea562090288c127001b38e45fa6 Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 19 May 2022 13:04:15 +0000 Subject: [PATCH 282/615] update tests --- tests/queries/0_stateless/01048_window_view_parser.sql | 4 ++-- tests/queries/0_stateless/01082_window_view_watch_limit.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index 05e11cb4326..95190ddafa1 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -56,7 +56,7 @@ SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---POPULATE JOIN---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv POPULATE AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory POPULATE AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; @@ -106,7 +106,7 @@ SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---POPULATE JOIN---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv POPULATE AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory POPULATE AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE test_01048.wv; diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py index 0ae9e9d7309..b31c9ee8167 100755 --- a/tests/queries/0_stateless/01082_window_view_watch_limit.py +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -38,7 +38,7 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01082_window_view_watch_limit.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01082_window_view_watch_limit.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01082_window_view_watch_limit.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01082_window_view_watch_limit.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect("Ok.") client1.expect(prompt) From 4290cbc81800265c982bba3d12ac02d84b7b0f2b Mon Sep 17 00:00:00 2001 From: Vxider Date: Thu, 19 May 2022 13:05:05 +0000 Subject: [PATCH 283/615] update create windowview syntax --- src/Parsers/ParserCreateQuery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 57a4b5ae7ba..6a34e1d2700 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -419,7 +419,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/// CREATE|ATTACH WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] [INNER ENGINE [db.]name] [ENGINE [db.]name] [WATERMARK function] [ALLOWED_LATENESS = interval_function] AS SELECT ... +/// CREATE|ATTACH WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] [INNER ENGINE [db.]name] [ENGINE [db.]name] [WATERMARK function] [ALLOWED_LATENESS = interval_function] [POPULATE] AS SELECT ... class ParserCreateWindowViewQuery : public IParserBase { protected: From 9acb42fcdbdcaf0f9e03859140d26b3bc1506cbf Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 15:23:08 +0200 Subject: [PATCH 284/615] Add time to wait for workflow canceled --- tests/ci/cancel_and_rerun_workflow_lambda/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 72fbe530bae..813ee9d1ab7 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -329,7 +329,7 @@ def main(event): exec_workflow_url([most_recent_workflow.cancel_url], token) print("Cancelled") - for _ in range(30): + for _ in range(45): latest_workflow_desc = get_workflow_description(most_recent_workflow.run_id) print("Checking latest workflow", latest_workflow_desc) if latest_workflow_desc.status in ("completed", "cancelled"): From 86d48e1c99f19acc773d999768329a3b20d0df20 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 19 May 2022 14:10:04 +0000 Subject: [PATCH 285/615] Disable WITH ROLLUP/CUBE for GROUPING SETS --- src/Interpreters/InterpreterSelectQuery.cpp | 5 +++- ...4_grouping_sets_with_rollup_cube.reference | 0 .../02304_grouping_sets_with_rollup_cube.sql | 23 +++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.reference create mode 100644 tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5eaeecb9373..1b34759e9dd 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1093,7 +1093,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

desc->type == ProjectionDescription::Type::Aggregate) { diff --git a/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.reference b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql new file mode 100644 index 00000000000..25263edc980 --- /dev/null +++ b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql @@ -0,0 +1,23 @@ +SELECT + number +FROM + numbers(10) +GROUP BY + GROUPING SETS + ( + number, + number % 2 + ) + WITH ROLLUP; -- { serverError NOT_IMPLEMENTED } + +SELECT + number +FROM + numbers(10) +GROUP BY + GROUPING SETS + ( + number, + number % 2 + ) + WITH CUBE; -- { serverError NOT_IMPLEMENTED } From 3c95d0830824b0f524a01a47552b02181dad27d5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 May 2022 16:36:23 +0200 Subject: [PATCH 286/615] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6babf03c7f..ebcf2c83183 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,7 +45,7 @@ * Improve performance of `avg`, `sum` aggregate functions if used without GROUP BY expression. [#37257](https://github.com/ClickHouse/ClickHouse/pull/37257) ([Maksim Kita](https://github.com/kitaisreal)). * Improve performance of unary arithmetic functions (`bitCount`, `bitNot`, `abs`, `intExp2`, `intExp10`, `negate`, `roundAge`, `roundDuration`, `roundToExp2`, `sign`) using dynamic dispatch. [#37289](https://github.com/ClickHouse/ClickHouse/pull/37289) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance or ORDER BY, MergeJoin, insertion into MergeTree using JIT compilation of sort columns comparator. [#34469](https://github.com/ClickHouse/ClickHouse/pull/34469) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of ORDER BY, MergeJoin, insertion into MergeTree using JIT compilation of sort columns comparator. [#34469](https://github.com/ClickHouse/ClickHouse/pull/34469) ([Maksim Kita](https://github.com/kitaisreal)). * Change structure of `system.asynchronous_metric_log`. It will take about 10 times less space. This closes [#36357](https://github.com/ClickHouse/ClickHouse/issues/36357). The field `event_time_microseconds` was removed, because it is useless. [#36360](https://github.com/ClickHouse/ClickHouse/pull/36360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Load marks for only necessary columns when reading wide parts. [#36879](https://github.com/ClickHouse/ClickHouse/pull/36879) ([Anton Kozlov](https://github.com/tonickkozlov)). * Improves performance of file descriptor cache by narrowing mutex scopes. [#36682](https://github.com/ClickHouse/ClickHouse/pull/36682) ([Anton Kozlov](https://github.com/tonickkozlov)). From b35146a2e7715b6abe0f5ecc450768e8152b94e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 May 2022 16:37:36 +0200 Subject: [PATCH 287/615] Fix typo --- programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 4e4e0cc07f5..229df1347f6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -69,7 +69,7 @@ void Client::processError(const String & query) const { if (server_exception) { - fmt::print(stderr, "Received exception from server (version {}):\n{}\n", + fmt::print(stderr, "Received exception from the server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); if (is_interactive) From aaaccdba342ceb1b6ec567c0f97e14a4e85d97bd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 May 2022 16:56:29 +0200 Subject: [PATCH 288/615] Update changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebcf2c83183..9eec13c884c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,8 +21,6 @@ * Enable memory overcommit by default. [#35921](https://github.com/ClickHouse/ClickHouse/pull/35921) ([Dmitry Novik](https://github.com/novikd)). * Add support of GROUPING SETS in GROUP BY clause. This implementation supports a parallel processing of grouping sets. [#33631](https://github.com/ClickHouse/ClickHouse/pull/33631) ([Dmitry Novik](https://github.com/novikd)). * Added `system.certificates` table. [#37142](https://github.com/ClickHouse/ClickHouse/pull/37142) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Implemented L1, L2, Linf, Cosine distance functions for arrays and L1, L2, Linf norm functions for arrays. - [#37033](https://github.com/ClickHouse/ClickHouse/pull/37033) ([qieqieplus](https://github.com/qieqieplus)). * Adds `h3Line`, `h3Distance` and `h3HexRing` functions. [#37030](https://github.com/ClickHouse/ClickHouse/pull/37030) ([Bharat Nallan](https://github.com/bharatnc)). * New single binary based diagnostics tool (clickhouse-diagnostics). [#36705](https://github.com/ClickHouse/ClickHouse/pull/36705) ([Dale McDiarmid](https://github.com/gingerwizard)). * Add output format `Prometheus` [#36051](https://github.com/ClickHouse/ClickHouse/issues/36051). [#36206](https://github.com/ClickHouse/ClickHouse/pull/36206) ([Vladimir C](https://github.com/vdimir)). @@ -33,6 +31,8 @@ #### Experimental Feature +* Implemented L1, L2, Linf, Cosine distance functions for arrays and L1, L2, Linf norm functions for arrays. + [#37033](https://github.com/ClickHouse/ClickHouse/pull/37033) ([qieqieplus](https://github.com/qieqieplus)). Caveat: the functions will be renamed. * Improve the `WATCH` query in WindowView: 1. Reduce the latency of providing query results by calling the `fire_condition` signal. 2. Makes the cancel query operation(ctrl-c) faster, by checking `isCancelled()` more frequently. [#37226](https://github.com/ClickHouse/ClickHouse/pull/37226) ([vxider](https://github.com/Vxider)). * Introspection for remove filesystem cache. [#36802](https://github.com/ClickHouse/ClickHouse/pull/36802) ([Han Shukai](https://github.com/KinderRiven)). * Added new hash function `wyHash64` for SQL. [#36467](https://github.com/ClickHouse/ClickHouse/pull/36467) ([olevino](https://github.com/olevino)). From a609287761252ae63d2e867dd48f21365fe61c3e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 19 May 2022 17:39:53 +0200 Subject: [PATCH 289/615] Add a comment about disabling further checks --- .clang-tidy | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.clang-tidy b/.clang-tidy index 70ce24aa731..5e8743f934b 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,3 +1,6 @@ +# Enable all checks + disale selected checks. Feel free to remove disabled checks from below list if +# a) the new check is not controversial (this includes many checks in readability-* and google-*) or +# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*). Checks: '*, -abseil-*, From ca702b459371b2ab6efc6e189beb46a03b002432 Mon Sep 17 00:00:00 2001 From: ni1l <94829417+ni1l@users.noreply.github.com> Date: Thu, 19 May 2022 23:52:08 +0800 Subject: [PATCH 290/615] Update the Chinese version of encoding-functions.md --- .../functions/encoding-functions.md | 353 +++++++++++++++++- 1 file changed, 349 insertions(+), 4 deletions(-) diff --git a/docs/zh/sql-reference/functions/encoding-functions.md b/docs/zh/sql-reference/functions/encoding-functions.md index f1152965d2d..b9a3cbf0550 100644 --- a/docs/zh/sql-reference/functions/encoding-functions.md +++ b/docs/zh/sql-reference/functions/encoding-functions.md @@ -68,12 +68,306 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello; ## hex {#hex} -接受`String`,`unsigned integer`,`Date`或`DateTime`类型的参数。返回包含参数的十六进制表示的字符串。使用大写字母`A-F`。不使用`0x`前缀或`h`后缀。对于字符串,所有字节都简单地编码为两个十六进制数字。数字转换为大端(«易阅读»)格式。对于数字,去除其中较旧的零,但仅限整个字节。例如,`hex(1)='01'`。 `Date`被编码为自Unix时间开始以来的天数。 `DateTime`编码为自Unix时间开始以来的秒数。 +返回包含参数的十六进制表示的字符串。 -## unhex(str) {#unhexstr} +别名为: `HEX`。 -接受包含任意数量的十六进制数字的字符串,并返回包含相应字节的字符串。支持大写和小写字母A-F。十六进制数字的数量不必是偶数。如果是奇数,则最后一位数被解释为00-0F字节的低位。如果参数字符串包含除十六进制数字以外的任何内容,则返回一些实现定义的结果(不抛出异常)。 -如果要将结果转换为数字,可以使用«reverse»和«reinterpretAsType»函数。 +**语法** + +``` sql +hex(arg) +``` + +该函数使用大写字母`A-F`,不使用任何前缀(如`0x`)或后缀(如`h`) + +对于整数参数,它从高到低(大端或“人类可读”顺序)打印十六进制数字(“半字节”)。它从左侧第一个非零字节开始(省略前导零字节),但即使前导数字为零,也始终打印每个字节的两个数字。 + +类型为[Date](../../sql-reference/data-types/date.md)和[DateTime](../../sql-reference/data-types/datetime.md)的值将被格式化为相应的整数(日期为 Epoch 以来的天数,DateTime 为 Unix Timestamp 的值)。 + +对于[String](../../sql-reference/data-types/string.md)和[FixedString](../../sql-reference/data-types/fixedstring.md),所有字节都被简单地编码为两个十六进制数字。零字节不会被省略。 + +类型为[Float](../../sql-reference/data-types/float.md)和[Decimal](../../sql-reference/data-types/decimal.md)的值被编码为它们在内存中的表示。由于我们支持小端架构,它们以小端编码。零前导尾随字节不会被省略。 + +类型为[UUID](../data-types/uuid.md)的值被编码为大端顺序字符串。 + +**参数** + +- `arg` — 要转换为十六进制的值。类型为[String](../../sql-reference/data-types/string.md),[UInt](../../sql-reference/data-types/int-uint.md),[Float](../../sql-reference/data-types/float.md),[Decimal](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md)或者[DateTime](../../sql-reference/data-types/datetime.md)。 + +**返回值** + +- 具有参数的十六进制表示的字符串。 + +类型为:[String](../../sql-reference/data-types/string.md)。 + +**示例** + +查询语句: + +``` sql +SELECT hex(1); +``` + +结果: + +``` text +01 +``` + +查询语句: + +``` sql +SELECT hex(toFloat32(number)) AS hex_presentation FROM numbers(15, 2); +``` + +结果: + +``` text +┌─hex_presentation─┐ +│ 00007041 │ +│ 00008041 │ +└──────────────────┘ +``` + +查询语句: + +``` sql +SELECT hex(toFloat64(number)) AS hex_presentation FROM numbers(15, 2); +``` + +结果: + +``` text +┌─hex_presentation─┐ +│ 0000000000002E40 │ +│ 0000000000003040 │ +└──────────────────┘ +``` + +查询语句: + +``` sql +SELECT lower(hex(toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'))) as uuid_hex +``` + +结果: + +``` text +┌─uuid_hex─────────────────────────┐ +│ 61f0c4045cb311e7907ba6006ad3dba0 │ +└──────────────────────────────────┘ +``` + +## unhex {#unhexstr} + +执行[hex](#hex)函数的相反操作。它将每对十六进制数字(在参数中)解释为一个数字,并将其转换为该数字表示的字节。返回值是一个二进制字符串 (BLOB)。 + +如果要将结果转换为数字,可以使用 [reverse](../../sql-reference/functions/string-functions.md#reverse) 和 [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) 函数。 + +:::注意 +如果从 `clickhouse-client` 中调用 `unhex`,二进制字符串将使用 UTF-8 显示。 +::: + +别名为:`UNHEX`。 + +**语法** + +``` sql +unhex(arg) +``` + +**参数** + +- `arg` — 包含任意数量的十六进制数字的字符串。类型为:[String](../../sql-reference/data-types/string.md)。 + +支持大写和小写字母A-F。十六进制数字的数量不必是偶数。如果是奇数,则最后一位数被解释为00-0F字节的低位。如果参数字符串包含除十六进制数字以外的任何内容,则返回一些实现定义的结果(不抛出异常)。对于数字参数, unhex()不执行 hex(N) 的倒数。 + +**返回值** + +- 二进制字符串 (BLOB)。 + +类型为: [String](../../sql-reference/data-types/string.md)。 + +**示例** + +查询语句: +``` sql +SELECT unhex('303132'), UNHEX('4D7953514C'); +``` + +结果: +``` text +┌─unhex('303132')─┬─unhex('4D7953514C')─┐ +│ 012 │ MySQL │ +└─────────────────┴─────────────────────┘ +``` + +查询语句: + +``` sql +SELECT reinterpretAsUInt64(reverse(unhex('FFF'))) AS num; +``` + +结果: + +``` text +┌──num─┐ +│ 4095 │ +└──────┘ +``` + +## bin {#bin} + +返回一个包含参数二进制表示的字符串。 + +**语法** + +``` sql +bin(arg) +``` + +别名为: `BIN`。 + +对于整数参数,它从最高有效到最低有效(大端或“人类可读”顺序)打印 bin 数字。它从最重要的非零字节开始(省略前导零字节),但如果前导数字为零,则始终打印每个字节的八位数字。 + +类型为[Date](../../sql-reference/data-types/date.md)和[DateTime](../../sql-reference/data-types/datetime.md)的值被格式化为相应的整数(`Date` 为 Epoch 以来的天数,`DateTime` 为 Unix Timestamp 的值)。 + +对于[String](../../sql-reference/data-types/string.md)和[FixedString](../../sql-reference/data-types/fixedstring.md),所有字节都被简单地编码为八个二进制数。零字节不会被省略。 + +类型为[Float](../../sql-reference/data-types/float.md)和[Decimal](../../sql-reference/data-types/decimal.md)的值被编码为它们在内存中的表示。由于我们支持小端架构,它们以小端编码。零前导尾随字节不会被省略。 + +类型为[UUID](../data-types/uuid.md)的值被编码为大端顺序字符串。 + +**参数** + +- `arg` — 要转换为二进制的值。类型为[String](../../sql-reference/data-types/string.md),[FixedString](../../sql-reference/data-types/fixedstring.md),[UInt](../../sql-reference/data-types/int-uint.md),[Float](../../sql-reference/data-types/float.md),[Decimal](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md)或者[DateTime](../../sql-reference/data-types/datetime.md)。 + +**返回值** + +- 具有参数的二进制表示的字符串。 + +类型为: [String](../../sql-reference/data-types/string.md)。 + +**示例** + +查询语句: + +``` sql +SELECT bin(14); +``` + +结果: + +``` text +┌─bin(14)──┐ +│ 00001110 │ +└──────────┘ +``` + +查询语句: + +``` sql +SELECT bin(toFloat32(number)) AS bin_presentation FROM numbers(15, 2); +``` + +结果: + +``` text +┌─bin_presentation─────────────────┐ +│ 00000000000000000111000001000001 │ +│ 00000000000000001000000001000001 │ +└──────────────────────────────────┘ +``` + +查询语句: + +``` sql +SELECT bin(toFloat64(number)) AS bin_presentation FROM numbers(15, 2); +``` + +结果: + +``` text +┌─bin_presentation─────────────────────────────────────────────────┐ +│ 0000000000000000000000000000000000000000000000000010111001000000 │ +│ 0000000000000000000000000000000000000000000000000011000001000000 │ +└──────────────────────────────────────────────────────────────────┘ +``` + +查询语句: + +``` sql +SELECT bin(toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0')) as bin_uuid +``` + +结果: + +``` text +┌─bin_uuid─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 01100001111100001100010000000100010111001011001100010001111001111001000001111011101001100000000001101010110100111101101110100000 │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + + +## unbin {#unbinstr} + +将每对二进制数字(在参数中)解释为一个数字,并将其转换为该数字表示的字节。这些函数执行与 [bin](#bin) 相反的操作。 + +**语法** + +``` sql +unbin(arg) +``` + +别名为: `UNBIN`。 + +对于数字参数,`unbin()` 不会返回 `bin()` 的倒数。如果要将结果转换为数字,可以使用[reverse](../../sql-reference/functions/string-functions.md#reverse) 和 [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) 函数。 + +:::note +如果从 `clickhouse-client` 中调用 `unbin`,则使用 UTF-8 显示二进制字符串。 +::: + +支持二进制数字`0`和`1`。二进制位数不必是八的倍数。如果参数字符串包含二进制数字以外的任何内容,则返回一些实现定义的结果(不抛出异常)。 + +**参数** + +- `arg` — 包含任意数量的二进制数字的字符串。类型为[String](../../sql-reference/data-types/string.md)。 + +**返回值** + +- 二进制字符串 (BLOB)。 + +类型为:[String](../../sql-reference/data-types/string.md)。 + +**示例** + +查询语句: + +``` sql +SELECT UNBIN('001100000011000100110010'), UNBIN('0100110101111001010100110101000101001100'); +``` + +结果: + +``` text +┌─unbin('001100000011000100110010')─┬─unbin('0100110101111001010100110101000101001100')─┐ +│ 012 │ MySQL │ +└───────────────────────────────────┴───────────────────────────────────────────────────┘ +``` + +查询语句: + +``` sql +SELECT reinterpretAsUInt64(reverse(unbin('1110'))) AS num; +``` + +结果: + +``` text +┌─num─┐ +│ 14 │ +└─────┘ +``` ## UUIDStringToNum(str) {#uuidstringtonumstr} @@ -91,4 +385,55 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello; 接受一个整数。返回一个UInt64类型数组,其中包含一组2的幂列表,其列表中的所有值相加等于这个整数。数组中的数字按升序排列。 +## bitPositionsToArray(num) {#bitpositionstoarraynum} + +接受整数并将其转换为无符号整数。返回一个 `UInt64` 数字数组,其中包含 `arg` 中等于 `1` 的位的位置列表,按升序排列。 + +**语法** + +```sql +bitPositionsToArray(arg) +``` + +**参数** + +- `arg` — 整数值。类型为[Int/UInt](../../sql-reference/data-types/int-uint.md)。 + +**返回值** + +- 包含等于 `1` 的位位置列表的数组,按升序排列。 + +类型为: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))。 + +**示例** + +查询语句: + +``` sql +SELECT bitPositionsToArray(toInt8(1)) AS bit_positions; +``` + +结果: + +``` text +┌─bit_positions─┐ +│ [0] │ +└───────────────┘ +``` + +查询语句: + +``` sql +SELECT bitPositionsToArray(toInt8(-1)) AS bit_positions; +``` + +结果: + +``` text +┌─bit_positions─────┐ +│ [0,1,2,3,4,5,6,7] │ +└───────────────────┘ +``` + + [来源文章](https://clickhouse.com/docs/en/query_language/functions/encoding_functions/) From 6a9d36a9e272b112d3128219cfb6574d992809c6 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 17:53:00 +0200 Subject: [PATCH 291/615] Do not fail CI on events DB down, improve logging --- tests/ci/clickhouse_helper.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index d52b6262a78..d3a7787dd96 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -7,6 +7,10 @@ import requests # type: ignore from get_robot_token import get_parameter_from_ssm +class InsertException(Exception): + pass + + class ClickHouseHelper: def __init__(self, url=None): if url is None: @@ -58,23 +62,37 @@ class ClickHouseHelper: response.request.body, ) - raise Exception(error) + raise InsertException(error) else: - raise Exception(error) + raise InsertException(error) def _insert_json_str_info(self, db, table, json_str): self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str) - def insert_event_into(self, db, table, event): + def insert_event_into(self, db, table, event, safe=True): event_str = json.dumps(event) - self._insert_json_str_info(db, table, event_str) + try: + self._insert_json_str_info(db, table, event_str) + except InsertException as e: + logging.error( + "Exception happened during inserting data into clickhouse: %s", e + ) + if not safe: + raise - def insert_events_into(self, db, table, events): + def insert_events_into(self, db, table, events, safe=True): jsons = [] for event in events: jsons.append(json.dumps(event)) - self._insert_json_str_info(db, table, ",".join(jsons)) + try: + self._insert_json_str_info(db, table, ",".join(jsons)) + except InsertException as e: + logging.error( + "Exception happened during inserting data into clickhouse: %s", e + ) + if not safe: + raise def _select_and_get_json_each_row(self, db, query): params = { @@ -96,7 +114,7 @@ class ClickHouseHelper: logging.warning("Reponse text %s", response.text) time.sleep(0.1 * i) - raise Exception("Cannot insert data into clickhouse") + raise Exception("Cannot fetch data from clickhouse") def select_json_each_row(self, db, query): text = self._select_and_get_json_each_row(db, query) @@ -187,4 +205,4 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results): if test_result[1] == "FAIL" and test_result[0] in master_failed_tests: test_result[1] = "FLAKY" except Exception as ex: - logging.info("Exception happened during flaky tests fetch %s", ex) + logging.error("Exception happened during flaky tests fetch %s", ex) From 338f63855743fbf2bc35d3c188d05c84cbd487d9 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 17:54:56 +0200 Subject: [PATCH 292/615] Fix linter errors --- tests/ci/clickhouse_helper.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index d3a7787dd96..b6e49dccea0 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -26,7 +26,7 @@ class ClickHouseHelper: def _insert_json_str_info_impl(url, auth, db, table, json_str): params = { "database": db, - "query": "INSERT INTO {table} FORMAT JSONEachRow".format(table=table), + "query": f"INSERT INTO {table} FORMAT JSONEachRow", "date_time_input_format": "best_effort", "send_logs_level": "warning", } @@ -185,17 +185,14 @@ def prepare_tests_results_for_clickhouse( def mark_flaky_tests(clickhouse_helper, check_name, test_results): try: - query = """ - SELECT DISTINCT test_name - FROM checks - WHERE - check_start_time BETWEEN now() - INTERVAL 3 DAY AND now() - AND check_name = '{check_name}' - AND (test_status = 'FAIL' OR test_status = 'FLAKY') - AND pull_request_number = 0 - """.format( - check_name=check_name - ) + query = f"""SELECT DISTINCT test_name +FROM checks +WHERE + check_start_time BETWEEN now() - INTERVAL 3 DAY AND now() + AND check_name = '{check_name}' + AND (test_status = 'FAIL' OR test_status = 'FLAKY') + AND pull_request_number = 0 +""" tests_data = clickhouse_helper.select_json_each_row("default", query) master_failed_tests = {row["test_name"] for row in tests_data} From 8a74b1cf2f8e84f7eabdc1e6f698fd7186cf2ec7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 17:56:35 +0200 Subject: [PATCH 293/615] Add forgotten insert_events_into to ast-fuzzer CI --- tests/ci/ast_fuzzer_check.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 94f5eff51d7..9ccae89b403 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -169,6 +169,8 @@ if __name__ == "__main__": check_name, ) + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + logging.info("Result: '%s', '%s', '%s'", status, description, report_url) print(f"::notice ::Report url: {report_url}") post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) From 7134fc848fb4714651b124e3f2a6ede215b49062 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 18:23:20 +0200 Subject: [PATCH 294/615] Use SSL certs verification for CI DB --- tests/ci/clickhouse_helper.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index b6e49dccea0..c595dc559df 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -32,9 +32,7 @@ class ClickHouseHelper: } for i in range(5): - response = requests.post( - url, params=params, data=json_str, headers=auth, verify=False - ) + response = requests.post(url, params=params, data=json_str, headers=auth) logging.info("Response content '%s'", response.content) @@ -103,9 +101,7 @@ class ClickHouseHelper: for i in range(5): response = None try: - response = requests.get( - self.url, params=params, headers=self.auth, verify=False - ) + response = requests.get(self.url, params=params, headers=self.auth) response.raise_for_status() return response.text except Exception as ex: From c87c3fcfd93225589d95bc14740c1f4aba490297 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 May 2022 19:25:45 +0200 Subject: [PATCH 295/615] Move Azure blob storage --- .../AzureBlobStorage/AzureBlobStorageAuth.cpp | 30 ++- .../AzureBlobStorage/AzureBlobStorageAuth.h | 5 +- .../AzureBlobStorage/DiskAzureBlobStorage.cpp | 168 -------------- .../AzureBlobStorage/DiskAzureBlobStorage.h | 86 ------- .../registerDiskAzureBlobStorage.cpp | 53 +++-- src/Disks/AzureObjectStorage.cpp | 213 ++++++++++++++++++ src/Disks/AzureObjectStorage.h | 113 ++++++++++ src/Disks/DiskObjectStorage.cpp | 2 +- src/Disks/DiskObjectStorage.h | 2 +- src/Disks/DiskRestartProxy.cpp | 4 +- src/Disks/HDFSObjectStorage.cpp | 7 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 4 +- src/Disks/S3/registerDiskS3.cpp | 1 - src/IO/ReadBufferFromAzureBlobStorage.cpp | 2 +- src/IO/ReadBufferFromAzureBlobStorage.h | 6 +- src/IO/WriteBufferFromAzureBlobStorage.cpp | 25 +- src/IO/WriteBufferFromAzureBlobStorage.h | 10 +- 17 files changed, 419 insertions(+), 312 deletions(-) delete mode 100644 src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp delete mode 100644 src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h create mode 100644 src/Disks/AzureObjectStorage.cpp create mode 100644 src/Disks/AzureObjectStorage.h diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp index 94553ba04e9..c078f584a09 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -66,27 +66,27 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr template -std::shared_ptr getClientWithConnectionString(const String & connection_str, const String & container_name) = delete; +std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name) = delete; template<> -std::shared_ptr getClientWithConnectionString( +std::unique_ptr getClientWithConnectionString( const String & connection_str, const String & /*container_name*/) { - return std::make_shared(BlobServiceClient::CreateFromConnectionString(connection_str)); + return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str)); } template<> -std::shared_ptr getClientWithConnectionString( +std::unique_ptr getClientWithConnectionString( const String & connection_str, const String & container_name) { - return std::make_shared(BlobContainerClient::CreateFromConnectionString(connection_str, container_name)); + return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name)); } template -std::shared_ptr getAzureBlobStorageClientWithAuth( +std::unique_ptr getAzureBlobStorageClientWithAuth( const String & url, const String & container_name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { if (config.has(config_prefix + ".connection_string")) @@ -101,15 +101,15 @@ std::shared_ptr getAzureBlobStorageClientWithAuth( config.getString(config_prefix + ".account_name"), config.getString(config_prefix + ".account_key") ); - return std::make_shared(url, storage_shared_key_credential); + return std::make_unique(url, storage_shared_key_credential); } auto managed_identity_credential = std::make_shared(); - return std::make_shared(url, managed_identity_credential); + return std::make_unique(url, managed_identity_credential); } -std::shared_ptr getAzureBlobContainerClient( +std::unique_ptr getAzureBlobContainerClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); @@ -136,10 +136,20 @@ std::shared_ptr getAzureBlobContainerClient( } } - return std::make_shared( + return std::make_unique( blob_service_client->CreateBlobContainer(container_name).Value); } +std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/) +{ + return std::make_unique( + config.getUInt64(config_prefix + ".max_single_part_upload_size", 100 * 1024 * 1024), + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".max_single_read_retries", 3), + config.getInt(config_prefix + ".max_single_download_retries", 3) + ); +} + } #endif diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h index 048daa7c9dc..32d3ca9945a 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h @@ -6,13 +6,16 @@ #include #include +#include namespace DB { -std::shared_ptr getAzureBlobContainerClient( +std::unique_ptr getAzureBlobContainerClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/); + } #endif diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp deleted file mode 100644 index 556c28bd3f4..00000000000 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int AZURE_BLOB_STORAGE_ERROR; -} - - -DiskAzureBlobStorageSettings::DiskAzureBlobStorageSettings( - UInt64 max_single_part_upload_size_, - UInt64 min_bytes_for_seek_, - int max_single_read_retries_, - int max_single_download_retries_, - int thread_pool_size_) : - max_single_part_upload_size(max_single_part_upload_size_), - min_bytes_for_seek(min_bytes_for_seek_), - max_single_read_retries(max_single_read_retries_), - max_single_download_retries(max_single_download_retries_), - thread_pool_size(thread_pool_size_) {} - - -DiskAzureBlobStorage::DiskAzureBlobStorage( - const String & name_, - DiskPtr metadata_disk_, - std::shared_ptr blob_container_client_, - SettingsPtr settings_, - GetDiskSettings settings_getter_) : - IDiskRemote(name_, "", metadata_disk_, nullptr, "DiskAzureBlobStorage", settings_->thread_pool_size), - blob_container_client(blob_container_client_), - current_settings(std::move(settings_)), - settings_getter(settings_getter_) {} - - -std::unique_ptr DiskAzureBlobStorage::readFile( - const String & path, - const ReadSettings & read_settings, - std::optional, - std::optional) const -{ - auto settings = current_settings.get(); - auto metadata = readMetadata(path); - - LOG_TEST(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); - - auto reader_impl = std::make_unique( - blob_container_client, metadata.remote_fs_root_path, metadata.remote_fs_objects, - settings->max_single_read_retries, settings->max_single_download_retries, read_settings); - - if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - auto reader = getThreadPoolReader(); - return std::make_unique(reader, read_settings, std::move(reader_impl)); - } - else - { - auto buf = std::make_unique(std::move(reader_impl)); - return std::make_unique(std::move(buf), current_settings.get()->min_bytes_for_seek); - } -} - - -std::unique_ptr DiskAzureBlobStorage::writeFile( - const String & path, - size_t buf_size, - WriteMode mode, - const WriteSettings &) -{ - auto blob_path = path + "_" + getRandomASCIIString(8); /// NOTE: path contains the tmp_* prefix in the blob name - - LOG_TRACE(log, "{} to file by path: {}. AzureBlob Storage path: {}", - mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), blob_path); - - auto buffer = std::make_unique( - blob_container_client, - blob_path, - current_settings.get()->max_single_part_upload_size, - buf_size); - - auto create_metadata_callback = [this, path, mode, blob_path] (size_t count) - { - readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_path, count] (Metadata & metadata) { metadata.addObject(blob_path, count); return true; }); - }; - - return std::make_unique(std::move(buffer), std::move(create_metadata_callback), blob_path); -} - - -DiskType DiskAzureBlobStorage::getType() const -{ - return DiskType::AzureBlobStorage; -} - - -bool DiskAzureBlobStorage::isRemote() const -{ - return true; -} - - -bool DiskAzureBlobStorage::supportZeroCopyReplication() const -{ - return true; -} - - -bool DiskAzureBlobStorage::checkUniqueId(const String & id) const -{ - Azure::Storage::Blobs::ListBlobsOptions blobs_list_options; - blobs_list_options.Prefix = id; - blobs_list_options.PageSizeHint = 1; - - auto blobs_list_response = blob_container_client->ListBlobs(blobs_list_options); - auto blobs_list = blobs_list_response.Blobs; - - for (const auto & blob : blobs_list) - { - if (id == blob.Name) - return true; - } - - return false; -} - - -void DiskAzureBlobStorage::removeFromRemoteFS(const std::vector & paths) -{ - for (const auto & path : paths) - { - try - { - auto delete_info = blob_container_client->DeleteBlob(path); - if (!delete_info.Value.Deleted) - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); - } - catch (const Azure::Storage::StorageException & e) - { - LOG_INFO(log, "Caught an error while deleting file {} : {}", path, e.Message); - throw; - } - } -} - -void DiskAzureBlobStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) -{ - auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context); - - current_settings.set(std::move(new_settings)); - - if (AsyncExecutor * exec = dynamic_cast(&getExecutor())) - exec->setMaxThreads(current_settings.get()->thread_pool_size); -} - -} - -#endif diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h deleted file mode 100644 index ff99e246d31..00000000000 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once - -#include - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -struct DiskAzureBlobStorageSettings final -{ - DiskAzureBlobStorageSettings( - UInt64 max_single_part_upload_size_, - UInt64 min_bytes_for_seek_, - int max_single_read_retries, - int max_single_download_retries, - int thread_pool_size_); - - size_t max_single_part_upload_size; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - UInt64 min_bytes_for_seek; - size_t max_single_read_retries; - size_t max_single_download_retries; - size_t thread_pool_size; -}; - - -class DiskAzureBlobStorage final : public IDiskRemote -{ -public: - - using SettingsPtr = std::unique_ptr; - using GetDiskSettings = std::function; - - DiskAzureBlobStorage( - const String & name_, - DiskPtr metadata_disk_, - std::shared_ptr blob_container_client_, - SettingsPtr settings_, - GetDiskSettings settings_getter_); - - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - std::unique_ptr writeFile( - const String & path, - size_t buf_size, - WriteMode mode, - const WriteSettings & settings) override; - - DiskType getType() const override; - - bool isRemote() const override; - - bool supportZeroCopyReplication() const override; - - bool checkUniqueId(const String & id) const override; - - void removeFromRemoteFS(const std::vector & paths) override; - - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) override; - -private: - - /// client used to access the files in the Blob Storage cloud - std::shared_ptr blob_container_client; - - MultiVersion current_settings; - /// Gets disk settings from context. - GetDiskSettings settings_getter; -}; - -} - -#endif diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 56df793783e..e111406a587 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -7,9 +7,9 @@ #include #include #include -#include #include - +#include +#include namespace DB { @@ -26,14 +26,12 @@ constexpr char test_file[] = "test.txt"; constexpr char test_str[] = "test"; constexpr size_t test_str_size = 4; - void checkWriteAccess(IDisk & disk) { auto file = disk.writeFile(test_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); file->write(test_str, test_str_size); } - void checkReadAccess(IDisk & disk) { auto file = disk.readFile(test_file); @@ -43,7 +41,6 @@ void checkReadAccess(IDisk & disk) throw Exception("No read access to disk", ErrorCodes::PATH_ACCESS_DENIED); } - void checkReadWithOffset(IDisk & disk) { auto file = disk.readFile(test_file); @@ -56,24 +53,11 @@ void checkReadWithOffset(IDisk & disk) throw Exception("Failed to read file with offset", ErrorCodes::PATH_ACCESS_DENIED); } - void checkRemoveAccess(IDisk & disk) { disk.removeFile(test_file); } - -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/) -{ - return std::make_unique( - config.getUInt64(config_prefix + ".max_single_part_upload_size", 100 * 1024 * 1024), - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".max_single_read_retries", 3), - config.getInt(config_prefix + ".max_single_download_retries", 3), - config.getInt(config_prefix + ".thread_pool_size", 16) - ); -} - } void registerDiskAzureBlobStorage(DiskFactory & factory) @@ -87,12 +71,27 @@ void registerDiskAzureBlobStorage(DiskFactory & factory) { auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); - std::shared_ptr azure_blob_storage_disk = std::make_shared( + FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); + + ObjectStoragePtr azure_object_storage = std::make_unique( + std::move(cache), name, - metadata_disk, getAzureBlobContainerClient(config, config_prefix), - getSettings(config, config_prefix, context), - getSettings + getAzureBlobStorageSettings(config, config_prefix, context)); + + + uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); + bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); + + std::shared_ptr azure_blob_storage_disk = std::make_shared( + name, + /* no namespaces */"", + "DiskAzureBlobStorage", + metadata_disk, + std::move(azure_object_storage), + DiskType::AzureBlobStorage, + send_metadata, + copy_thread_pool_size ); if (!config.getBool(config_prefix + ".skip_access_check", false)) @@ -103,9 +102,17 @@ void registerDiskAzureBlobStorage(DiskFactory & factory) checkRemoveAccess(*azure_blob_storage_disk); } +#ifdef NDEBUG + bool use_cache = true; +#else + /// Current cache implementation lead to allocations in destructor of + /// read buffer. + bool use_cache = false; +#endif + azure_blob_storage_disk->startup(context); - if (config.getBool(config_prefix + ".cache_enabled", true)) + if (config.getBool(config_prefix + ".cache_enabled", use_cache)) { String cache_path = config.getString(config_prefix + ".cache_path", context->getPath() + "disks/" + name + "/cache/"); azure_blob_storage_disk = wrapWithCache(azure_blob_storage_disk, "azure-blob-storage-cache", cache_path, metadata_path); diff --git a/src/Disks/AzureObjectStorage.cpp b/src/Disks/AzureObjectStorage.cpp new file mode 100644 index 00000000000..68f7f63638a --- /dev/null +++ b/src/Disks/AzureObjectStorage.cpp @@ -0,0 +1,213 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int AZURE_BLOB_STORAGE_ERROR; + extern const int UNSUPPORTED_METHOD; +} + + +AzureObjectStorage::AzureObjectStorage( + FileCachePtr && cache_, + const String & name_, + AzureClientPtr && client_, + SettingsPtr && settings_) + : IObjectStorage(std::move(cache_)) + , name(name_) + , client(std::move(client_)) + , settings(std::move(settings_)) +{ +} + +bool AzureObjectStorage::exists(const std::string & uri) const +{ + auto client_ptr = client.get(); + + /// What a shame, no Exists method... + Azure::Storage::Blobs::ListBlobsOptions options; + options.Prefix = uri; + options.PageSizeHint = 1; + + auto blobs_list_response = client_ptr->ListBlobs(options); + auto blobs_list = blobs_list_response.Blobs; + + for (const auto & blob : blobs_list) + { + if (uri == blob.Name) + return true; + } + + return false; +} + +std::unique_ptr AzureObjectStorage::readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto settings_ptr = settings.get(); + + return std::make_unique( + client.get(), path, settings_ptr->max_single_read_retries, + settings_ptr->max_single_download_retries, read_settings.remote_fs_buffer_size); +} + +std::unique_ptr AzureObjectStorage::readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings, + std::optional, + std::optional) const +{ + auto settings_ptr = settings.get(); + auto reader_impl = std::make_unique( + client.get(), common_path_prefix, blobs_to_read, + settings_ptr->max_single_read_retries, settings_ptr->max_single_download_retries, read_settings); + + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + { + auto reader = getThreadPoolReader(); + return std::make_unique(reader, read_settings, std::move(reader_impl)); + } + else + { + auto buf = std::make_unique(std::move(reader_impl)); + return std::make_unique(std::move(buf), settings_ptr->min_bytes_for_seek); + } +} + +/// Open the file for write and return WriteBufferFromFileBase object. +std::unique_ptr AzureObjectStorage::writeObject( /// NOLINT + const std::string & path, + WriteMode mode, + std::optional, + FinalizeCallback && finalize_callback, + size_t buf_size, + const WriteSettings &) +{ + if (mode != WriteMode::Rewrite) + throw Exception("Azure storage doesn't support append", ErrorCodes::UNSUPPORTED_METHOD); + + auto buffer = std::make_unique( + client.get(), + path, + settings.get()->max_single_part_upload_size, + buf_size); + + return std::make_unique(std::move(buffer), std::move(finalize_callback), path); +} + +void AzureObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +{ + auto client_ptr = client.get(); + + Azure::Storage::Blobs::ListBlobsOptions blobs_list_options; + blobs_list_options.Prefix = path; + + auto blobs_list_response = client_ptr->ListBlobs(blobs_list_options); + auto blobs_list = blobs_list_response.Blobs; + + for (const auto & blob : blobs_list) + children.emplace_back(blob.Name, blob.BlobSize); +} + +/// Remove file. Throws exception if file doesn't exists or it's a directory. +void AzureObjectStorage::removeObject(const std::string & path) +{ + auto client_ptr = client.get(); + auto delete_info = client_ptr->DeleteBlob(path); + if (!delete_info.Value.Deleted) + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); +} + +void AzureObjectStorage::removeObjects(const std::vector & paths) +{ + auto client_ptr = client.get(); + for (const auto & path : paths) + { + auto delete_info = client_ptr->DeleteBlob(path); + if (!delete_info.Value.Deleted) + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); + } +} + +void AzureObjectStorage::removeObjectIfExists(const std::string & path) +{ + auto client_ptr = client.get(); + auto delete_info = client_ptr->DeleteBlob(path); +} + +void AzureObjectStorage::removeObjectsIfExist(const std::vector & paths) +{ + auto client_ptr = client.get(); + for (const auto & path : paths) + auto delete_info = client_ptr->DeleteBlob(path); +} + + +ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const +{ + auto client_ptr = client.get(); + auto blob_client = client_ptr->GetBlobClient(path); + auto properties = blob_client.GetProperties().Value; + ObjectMetadata result; + result.size_bytes = properties.BlobSize; + if (!properties.Metadata.empty()) + { + result.attributes.emplace(); + for (const auto & [key, value] : properties.Metadata) + (*result.attributes)[key] = value; + } + result.last_modified.emplace(properties.LastModified.time_since_epoch().count()); + return result; +} + +void AzureObjectStorage::copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes) +{ + auto client_ptr = client.get(); + auto dest_blob_client = client_ptr->GetBlobClient(object_to); + auto source_blob_client = client_ptr->GetBlobClient(object_from); + Azure::Storage::Blobs::CopyBlobFromUriOptions copy_options; + if (object_to_attributes.has_value()) + { + for (const auto & [key, value] : *object_to_attributes) + copy_options.Metadata[key] = value; + } + + dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options); +} + +void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); + auto new_client = getAzureBlobContainerClient(config, config_prefix); + + client.set(std::move(new_client)); + settings.set(std::move(new_settings)); +} + + +std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + return std::make_unique( + nullptr, + name, + getAzureBlobContainerClient(config, config_prefix), + getAzureBlobStorageSettings(config, config_prefix, context) + ); +} + +} diff --git a/src/Disks/AzureObjectStorage.h b/src/Disks/AzureObjectStorage.h new file mode 100644 index 00000000000..da6393fd55d --- /dev/null +++ b/src/Disks/AzureObjectStorage.h @@ -0,0 +1,113 @@ +#pragma once +#include + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +struct AzureObjectStorageSettings +{ + AzureObjectStorageSettings( + uint64_t max_single_part_upload_size_, + uint64_t min_bytes_for_seek_, + int max_single_read_retries_, + int max_single_download_retries_) + : max_single_part_upload_size(max_single_part_upload_size_) + , min_bytes_for_seek(min_bytes_for_seek_) + , max_single_read_retries(max_single_read_retries_) + , max_single_download_retries(max_single_download_retries_) + { + } + + size_t max_single_part_upload_size; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset + uint64_t min_bytes_for_seek; + size_t max_single_read_retries; + size_t max_single_download_retries; +}; + +using AzureClient = Azure::Storage::Blobs::BlobContainerClient; +using AzureClientPtr = std::unique_ptr; + +class AzureObjectStorage : public IObjectStorage +{ +public: + + using SettingsPtr = std::unique_ptr; + + AzureObjectStorage( + FileCachePtr && cache_, + const String & name_, + AzureClientPtr && client_, + SettingsPtr && settings_); + + bool exists(const std::string & uri) const override; + + std::unique_ptr readObject( /// NOLINT + const std::string & path, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + std::unique_ptr readObjects( /// NOLINT + const std::string & common_path_prefix, + const BlobsPathToSize & blobs_to_read, + const ReadSettings & read_settings = ReadSettings{}, + std::optional read_hint = {}, + std::optional file_size = {}) const override; + + /// Open the file for write and return WriteBufferFromFileBase object. + std::unique_ptr writeObject( /// NOLINT + const std::string & path, + WriteMode mode, + std::optional attributes = {}, + FinalizeCallback && finalize_callback = {}, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + const WriteSettings & write_settings = {}) override; + + void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + /// Remove file. Throws exception if file doesn't exists or it's a directory. + void removeObject(const std::string & path) override; + + void removeObjects(const std::vector & paths) override; + + void removeObjectIfExists(const std::string & path) override; + + void removeObjectsIfExist(const std::vector & paths) override; + + ObjectMetadata getObjectMetadata(const std::string & path) const override; + + void copyObject( /// NOLINT + const std::string & object_from, + const std::string & object_to, + std::optional object_to_attributes = {}) override; + + void shutdown() override {} + + void startup() override {} + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + + String getObjectsNamespace() const override { return ""; } + + std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + +private: + const String name; + /// client used to access the files in the Blob Storage cloud + MultiVersion client; + MultiVersion settings; +}; + +} + +#endif diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index bfec350caba..31ae7dc575f 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -875,7 +875,7 @@ void DiskObjectStorageMetadataHelper::findLastRevision() LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); } -int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) const +int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) { const std::string path = source_path + SCHEMA_VERSION_OBJECT; int version = 0; diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index f1687fe19b6..76a0191ade5 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -298,7 +298,7 @@ public: void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; void findLastRevision(); - int readSchemaVersion(IObjectStorage * object_storage, const String & source_path) const; + static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); void saveSchemaVersion(const int & version) const; void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; void migrateFileToRestorableSchema(const String & path) const; diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 903caf705c5..b1bba40026a 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -5,8 +5,10 @@ namespace DB { + namespace ErrorCodes -{extern const int DEADLOCK_AVOIDED; +{ + extern const int DEADLOCK_AVOIDED; } using Millis = std::chrono::milliseconds; diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp index cbd89bcca88..9e99e7aa820 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/HDFSObjectStorage.cpp @@ -35,7 +35,6 @@ bool HDFSObjectStorage::exists(const std::string & hdfs_uri) const const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); const String remote_fs_object_path = hdfs_uri.substr(begin_of_path); return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); - } std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT @@ -72,9 +71,9 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); /// Single O_WRONLY in libhdfs adds O_TRUNC - auto hdfs_buffer = std::make_unique(path, - config, settings->replication, buf_size, - mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); + auto hdfs_buffer = std::make_unique( + path, config, settings->replication, buf_size, + mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); return std::make_unique(std::move(hdfs_buffer), std::move(finalize_callback), path); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 509b73da5d4..ba477ced601 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -146,7 +146,7 @@ class ReadBufferFromAzureBlobStorageGather final : public ReadBufferFromRemoteFS { public: ReadBufferFromAzureBlobStorageGather( - std::shared_ptr blob_container_client_, + std::shared_ptr blob_container_client_, const std::string & common_path_prefix_, const BlobsPathToSize & blobs_to_read_, size_t max_single_read_retries_, @@ -162,7 +162,7 @@ public: SeekableReadBufferPtr createImplementationBufferImpl(const String & path, size_t file_size) override; private: - std::shared_ptr blob_container_client; + std::shared_ptr blob_container_client; size_t max_single_read_retries; size_t max_single_download_retries; }; diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index fda1a1f51b0..5da49be12e4 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -80,7 +80,6 @@ void registerDiskS3(DiskFactory & factory) FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); - ObjectStoragePtr s3_storage = std::make_unique( std::move(cache), getClient(config, config_prefix, context), getSettings(config, config_prefix, context), diff --git a/src/IO/ReadBufferFromAzureBlobStorage.cpp b/src/IO/ReadBufferFromAzureBlobStorage.cpp index 41cec694786..2576b10f9ac 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/IO/ReadBufferFromAzureBlobStorage.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes ReadBufferFromAzureBlobStorage::ReadBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + std::shared_ptr blob_container_client_, const String & path_, size_t max_single_read_retries_, size_t max_single_download_retries_, diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h index 80078afd6d0..b7459ccead1 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/IO/ReadBufferFromAzureBlobStorage.h @@ -17,8 +17,8 @@ class ReadBufferFromAzureBlobStorage : public SeekableReadBuffer, public WithFil { public: - explicit ReadBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + ReadBufferFromAzureBlobStorage( + std::shared_ptr blob_container_client_, const String & path_, size_t max_single_read_retries_, size_t max_single_download_retries_, @@ -41,7 +41,7 @@ private: void initialize(); std::unique_ptr data_stream; - std::shared_ptr blob_container_client; + std::shared_ptr blob_container_client; std::unique_ptr blob_client; const String path; diff --git a/src/IO/WriteBufferFromAzureBlobStorage.cpp b/src/IO/WriteBufferFromAzureBlobStorage.cpp index eef1c8108fa..18e03b08817 100644 --- a/src/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/IO/WriteBufferFromAzureBlobStorage.cpp @@ -12,14 +12,18 @@ namespace DB { WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + std::shared_ptr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, - size_t buf_size_) : - BufferWithOwnMemory(buf_size_, nullptr, 0), - blob_container_client(blob_container_client_), - max_single_part_upload_size(max_single_part_upload_size_), - blob_path(blob_path_) {} + size_t buf_size_, + std::optional> attributes_) + : BufferWithOwnMemory(buf_size_, nullptr, 0) + , blob_container_client(blob_container_client_) + , max_single_part_upload_size(max_single_part_upload_size_) + , blob_path(blob_path_) + , attributes(attributes_) +{ +} WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() @@ -29,6 +33,15 @@ WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() void WriteBufferFromAzureBlobStorage::finalizeImpl() { + if (attributes.has_value()) + { + auto blob_client = blob_container_client->GetBlobClient(blob_path); + Azure::Storage::Metadata metadata; + for (const auto & [key, value] : *attributes) + metadata[key] = value; + blob_client.SetMetadata(metadata); + } + const size_t max_tries = 3; for (size_t i = 0; i < max_tries; ++i) { diff --git a/src/IO/WriteBufferFromAzureBlobStorage.h b/src/IO/WriteBufferFromAzureBlobStorage.h index 75336c497eb..ef13a24abd8 100644 --- a/src/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/IO/WriteBufferFromAzureBlobStorage.h @@ -19,11 +19,12 @@ class WriteBufferFromAzureBlobStorage : public BufferWithOwnMemory { public: - explicit WriteBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + WriteBufferFromAzureBlobStorage( + std::shared_ptr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, - size_t buf_size_); + size_t buf_size_, + std::optional> attributes_ = {}); ~WriteBufferFromAzureBlobStorage() override; @@ -32,9 +33,10 @@ public: private: void finalizeImpl() override; - std::shared_ptr blob_container_client; + std::shared_ptr blob_container_client; size_t max_single_part_upload_size; const String blob_path; + std::optional> attributes; }; } From 92c15ec97c09284bc90bb172a6b835b52f9867f5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 May 2022 20:07:15 +0200 Subject: [PATCH 296/615] Get rid of IDiskRemote --- .../AzureBlobStorage/AzureBlobStorageAuth.h | 1 - src/Disks/DiskObjectStorage.cpp | 1 + src/Disks/DiskWebServer.cpp | 9 +- src/Disks/DiskWebServer.h | 5 +- src/Disks/IDiskObjectStorage.h | 8 - src/Disks/IDiskRemote.cpp | 702 ------------------ src/Disks/IDiskRemote.h | 302 -------- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 1 - src/Disks/IO/ReadBufferFromRemoteFSGather.h | 2 +- src/Disks/IO/ReadIndirectBufferFromRemoteFS.h | 1 - src/Disks/IO/ThreadPoolRemoteFSReader.h | 1 - .../IO/WriteIndirectBufferFromRemoteFS.h | 1 - src/Interpreters/Context.cpp | 4 +- src/Storages/MergeTree/DataPartsExchange.cpp | 1 - src/Storages/System/StorageSystemDisks.cpp | 1 - 15 files changed, 15 insertions(+), 1025 deletions(-) delete mode 100644 src/Disks/IDiskObjectStorage.h delete mode 100644 src/Disks/IDiskRemote.cpp delete mode 100644 src/Disks/IDiskRemote.h diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h index 32d3ca9945a..fcd4fd51b49 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h @@ -4,7 +4,6 @@ #if USE_AZURE_BLOB_STORAGE -#include #include #include diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 31ae7dc575f..4b0134b2d07 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace DB { diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 576ded94b01..4f1fc1ad8fb 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -9,8 +9,13 @@ #include #include -#include +#include +#include +#include + #include + + #include #include #include @@ -173,7 +178,7 @@ std::unique_ptr DiskWebServer::readFile(const String & p if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { - auto reader = IDiskRemote::getThreadPoolReader(); + auto reader = IObjectStorage::getThreadPoolReader(); return std::make_unique(reader, read_settings, std::move(web_impl), min_bytes_for_seek); } else diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index dd699921f7c..47042fabc3d 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -1,10 +1,13 @@ #pragma once -#include #include #include #include +#include +#include +#include + namespace DB { diff --git a/src/Disks/IDiskObjectStorage.h b/src/Disks/IDiskObjectStorage.h deleted file mode 100644 index 90794301e54..00000000000 --- a/src/Disks/IDiskObjectStorage.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -} diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp deleted file mode 100644 index d72d7004cb7..00000000000 --- a/src/Disks/IDiskRemote.cpp +++ /dev/null @@ -1,702 +0,0 @@ -#include - -#include "Disks/DiskFactory.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int INCORRECT_DISK_INDEX; - extern const int UNKNOWN_FORMAT; - extern const int FILE_ALREADY_EXISTS; - extern const int PATH_ACCESS_DENIED;; - extern const int FILE_DOESNT_EXIST; - extern const int BAD_FILE_TYPE; -} - - -IDiskRemote::Metadata IDiskRemote::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - return result; -} - - -IDiskRemote::Metadata IDiskRemote::Metadata::createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.save(sync); - return result; -} - -IDiskRemote::Metadata IDiskRemote::Metadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - if (updater(result)) - result.save(sync); - return result; -} - -IDiskRemote::Metadata IDiskRemote::Metadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - updater(result); - result.save(sync); - return result; -} - -IDiskRemote::Metadata IDiskRemote::Metadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - if (updater(result)) - result.save(sync); - metadata_disk_->removeFile(metadata_file_path_); - - return result; - -} - -IDiskRemote::Metadata IDiskRemote::Metadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite) -{ - if (overwrite || !metadata_disk_->exists(metadata_file_path_)) - { - return createAndStoreMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_, sync); - } - else - { - auto result = readMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - if (result.read_only) - throw Exception("File is read-only: " + metadata_file_path_, ErrorCodes::PATH_ACCESS_DENIED); - return result; - } -} - -void IDiskRemote::Metadata::load() -{ - const ReadSettings read_settings; - auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */ - - UInt32 version; - readIntText(version, *buf); - - if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) - throw Exception( - ErrorCodes::UNKNOWN_FORMAT, - "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", - metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG)); - - assertChar('\n', *buf); - - UInt32 remote_fs_objects_count; - readIntText(remote_fs_objects_count, *buf); - assertChar('\t', *buf); - readIntText(total_size, *buf); - assertChar('\n', *buf); - remote_fs_objects.resize(remote_fs_objects_count); - - for (size_t i = 0; i < remote_fs_objects_count; ++i) - { - String remote_fs_object_path; - size_t remote_fs_object_size; - readIntText(remote_fs_object_size, *buf); - assertChar('\t', *buf); - readEscapedString(remote_fs_object_path, *buf); - if (version == VERSION_ABSOLUTE_PATHS) - { - if (!remote_fs_object_path.starts_with(remote_fs_root_path)) - throw Exception(ErrorCodes::UNKNOWN_FORMAT, - "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", - remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath()); - - remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); - } - assertChar('\n', *buf); - remote_fs_objects[i].relative_path = remote_fs_object_path; - remote_fs_objects[i].bytes_size = remote_fs_object_size; - } - - readIntText(ref_count, *buf); - assertChar('\n', *buf); - - if (version >= VERSION_READ_ONLY_FLAG) - { - readBoolText(read_only, *buf); - assertChar('\n', *buf); - } -} - -/// Load metadata by path or create empty if `create` flag is set. -IDiskRemote::Metadata::Metadata( - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - const String & metadata_file_path_) - : remote_fs_root_path(remote_fs_root_path_) - , metadata_file_path(metadata_file_path_) - , metadata_disk(metadata_disk_) -{ -} - -void IDiskRemote::Metadata::addObject(const String & path, size_t size) -{ - total_size += size; - remote_fs_objects.emplace_back(path, size); -} - - -void IDiskRemote::Metadata::saveToBuffer(WriteBuffer & buf, bool sync) -{ - writeIntText(VERSION_RELATIVE_PATHS, buf); - writeChar('\n', buf); - - writeIntText(remote_fs_objects.size(), buf); - writeChar('\t', buf); - writeIntText(total_size, buf); - writeChar('\n', buf); - - for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) - { - writeIntText(remote_fs_object_size, buf); - writeChar('\t', buf); - writeEscapedString(remote_fs_object_path, buf); - writeChar('\n', buf); - } - - writeIntText(ref_count, buf); - writeChar('\n', buf); - - writeBoolText(read_only, buf); - writeChar('\n', buf); - - buf.finalize(); - if (sync) - buf.sync(); - -} - -/// Fsync metadata file if 'sync' flag is set. -void IDiskRemote::Metadata::save(bool sync) -{ - auto buf = metadata_disk->writeFile(metadata_file_path, 1024); - saveToBuffer(*buf, sync); -} - -std::string IDiskRemote::Metadata::serializeToString() -{ - WriteBufferFromOwnString write_buf; - saveToBuffer(write_buf, false); - return write_buf.str(); -} - -IDiskRemote::Metadata IDiskRemote::readMetadataUnlocked(const String & path, std::shared_lock &) const -{ - return Metadata::readMetadata(remote_fs_root_path, metadata_disk, path); -} - - -IDiskRemote::Metadata IDiskRemote::readMetadata(const String & path) const -{ - std::shared_lock lock(metadata_mutex); - return readMetadataUnlocked(path, lock); -} - -IDiskRemote::Metadata IDiskRemote::readUpdateAndStoreMetadata(const String & path, bool sync, IDiskRemote::MetadataUpdater updater) -{ - std::unique_lock lock(metadata_mutex); - return Metadata::readUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); -} - - -IDiskRemote::Metadata IDiskRemote::readUpdateStoreMetadataAndRemove(const String & path, bool sync, IDiskRemote::MetadataUpdater updater) -{ - std::unique_lock lock(metadata_mutex); - return Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater); -} - -IDiskRemote::Metadata IDiskRemote::readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, IDiskRemote::MetadataUpdater updater) -{ - if (mode == WriteMode::Rewrite || !metadata_disk->exists(path)) - { - std::unique_lock lock(metadata_mutex); - return Metadata::createUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); - } - else - { - return Metadata::readUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); - } -} - -IDiskRemote::Metadata IDiskRemote::createAndStoreMetadata(const String & path, bool sync) -{ - return Metadata::createAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync); -} - -IDiskRemote::Metadata IDiskRemote::createUpdateAndStoreMetadata(const String & path, bool sync, IDiskRemote::MetadataUpdater updater) -{ - return Metadata::createUpdateAndStoreMetadata(remote_fs_root_path, metadata_disk, path, sync, updater); -} - - -std::unordered_map IDiskRemote::getSerializedMetadata(const std::vector & file_paths) const -{ - std::unordered_map metadatas; - - std::shared_lock lock(metadata_mutex); - - for (const auto & path : file_paths) - { - IDiskRemote::Metadata metadata = readMetadataUnlocked(path, lock); - metadata.ref_count = 0; - metadatas[path] = metadata.serializeToString(); - } - - return metadatas; -} - -void IDiskRemote::removeMetadata(const String & path, std::vector & paths_to_remove) -{ - LOG_TRACE(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path)); - - if (!metadata_disk->exists(path)) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist", path); - - if (!metadata_disk->isFile(path)) - throw Exception(ErrorCodes::BAD_FILE_TYPE, "Path '{}' is not a regular file", path); - - try - { - auto metadata_updater = [&paths_to_remove, this] (Metadata & metadata) - { - if (metadata.ref_count == 0) - { - for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects) - { - - paths_to_remove.push_back(remote_fs_root_path + remote_fs_object_path); - - if (cache) - { - auto key = cache->hash(remote_fs_object_path); - cache->remove(key); - } - } - - return false; - } - else /// In other case decrement number of references, save metadata and delete hardlink. - { - --metadata.ref_count; - } - - return true; - }; - - readUpdateStoreMetadataAndRemove(path, false, metadata_updater); - /// If there is no references - delete content from remote FS. - } - catch (const Exception & e) - { - /// If it's impossible to read meta - just remove it from FS. - if (e.code() == ErrorCodes::UNKNOWN_FORMAT) - { - LOG_WARNING(log, - "Metadata file {} can't be read by reason: {}. Removing it forcibly.", - backQuote(path), e.nested() ? e.nested()->message() : e.message()); - metadata_disk->removeFile(path); - } - else - throw; - } -} - - -void IDiskRemote::removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove) -{ - checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. - - if (metadata_disk->isFile(path)) - { - removeMetadata(path, paths_to_remove[path]); - } - else - { - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - removeMetadataRecursive(it->path(), paths_to_remove); - - metadata_disk->removeDirectory(path); - } -} - -std::vector IDiskRemote::getRemotePaths(const String & local_path) const -{ - auto metadata = readMetadata(local_path); - - std::vector remote_paths; - for (const auto & [remote_path, _] : metadata.remote_fs_objects) - remote_paths.push_back(fs::path(metadata.remote_fs_root_path) / remote_path); - - return remote_paths; -} - -void IDiskRemote::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) -{ - /// Protect against concurrent delition of files (for example because of a merge). - if (metadata_disk->isFile(local_path)) - { - try - { - paths_map.emplace_back(local_path, getRemotePaths(local_path)); - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) - return; - throw; - } - } - else - { - DiskDirectoryIteratorPtr it; - try - { - it = iterateDirectory(local_path); - } - catch (const fs::filesystem_error & e) - { - if (e.code() == std::errc::no_such_file_or_directory) - return; - throw; - } - - for (; it->isValid(); it->next()) - IDiskRemote::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map); - } -} - -DiskPtr DiskRemoteReservation::getDisk(size_t i) const -{ - if (i != 0) - throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); - return disk; -} - -void DiskRemoteReservation::update(UInt64 new_size) -{ - std::lock_guard lock(disk->reservation_mutex); - disk->reserved_bytes -= size; - size = new_size; - disk->reserved_bytes += size; -} - - -DiskRemoteReservation::~DiskRemoteReservation() -{ - try - { - std::lock_guard lock(disk->reservation_mutex); - if (disk->reserved_bytes < size) - { - disk->reserved_bytes = 0; - LOG_ERROR(disk->log, "Unbalanced reservations size for disk '{}'.", disk->getName()); - } - else - { - disk->reserved_bytes -= size; - } - - if (disk->reservation_count == 0) - LOG_ERROR(disk->log, "Unbalanced reservation count for disk '{}'.", disk->getName()); - else - --disk->reservation_count; - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - - -IDiskRemote::IDiskRemote( - const String & name_, - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - FileCachePtr cache_, - const String & log_name_, - size_t thread_pool_size) - : IDisk(std::make_unique(log_name_, thread_pool_size)) - , log(&Poco::Logger::get(log_name_)) - , name(name_) - , remote_fs_root_path(remote_fs_root_path_) - , metadata_disk(metadata_disk_) - , cache(cache_) -{ -} - - -String IDiskRemote::getCacheBasePath() const -{ - return cache ? cache->getBasePath() : ""; -} - - -bool IDiskRemote::exists(const String & path) const -{ - return metadata_disk->exists(path); -} - - -bool IDiskRemote::isFile(const String & path) const -{ - return metadata_disk->isFile(path); -} - - -void IDiskRemote::createFile(const String & path) -{ - createAndStoreMetadata(path, false); -} - - -size_t IDiskRemote::getFileSize(const String & path) const -{ - return readMetadata(path).total_size; -} - - -void IDiskRemote::moveFile(const String & from_path, const String & to_path) -{ - if (exists(to_path)) - throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); - - metadata_disk->moveFile(from_path, to_path); -} - - -void IDiskRemote::replaceFile(const String & from_path, const String & to_path) -{ - if (exists(to_path)) - { - const String tmp_path = to_path + ".old"; - moveFile(to_path, tmp_path); - moveFile(from_path, to_path); - removeFile(tmp_path); - } - else - moveFile(from_path, to_path); -} - -void IDiskRemote::removeSharedFile(const String & path, bool delete_metadata_only) -{ - std::vector paths_to_remove; - removeMetadata(path, paths_to_remove); - - if (!delete_metadata_only) - removeFromRemoteFS(paths_to_remove); -} - -void IDiskRemote::removeSharedFileIfExists(const String & path, bool delete_metadata_only) -{ - std::vector paths_to_remove; - if (metadata_disk->exists(path)) - { - removeMetadata(path, paths_to_remove); - if (!delete_metadata_only) - removeFromRemoteFS(paths_to_remove); - } -} - -void IDiskRemote::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - std::unordered_map> paths_to_remove; - for (const auto & file : files) - { - bool skip = file.if_exists && !metadata_disk->exists(file.path); - if (!skip) - removeMetadata(file.path, paths_to_remove[file.path]); - } - - if (!keep_all_batch_data) - { - std::vector remove_from_remote; - for (auto && [path, remote_paths] : paths_to_remove) - { - if (!file_names_remove_metadata_only.contains(fs::path(path).filename())) - remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); - } - removeFromRemoteFS(remove_from_remote); - } -} - -void IDiskRemote::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - std::unordered_map> paths_to_remove; - removeMetadataRecursive(path, paths_to_remove); - - if (!keep_all_batch_data) - { - std::vector remove_from_remote; - for (auto && [local_path, remote_paths] : paths_to_remove) - { - if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) - remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); - } - removeFromRemoteFS(remove_from_remote); - } -} - - -void IDiskRemote::setReadOnly(const String & path) -{ - /// We should store read only flag inside metadata file (instead of using FS flag), - /// because we modify metadata file when create hard-links from it. - readUpdateAndStoreMetadata(path, false, [] (Metadata & metadata) { metadata.read_only = true; return true; }); -} - - -bool IDiskRemote::isDirectory(const String & path) const -{ - return metadata_disk->isDirectory(path); -} - - -void IDiskRemote::createDirectory(const String & path) -{ - metadata_disk->createDirectory(path); -} - - -void IDiskRemote::createDirectories(const String & path) -{ - metadata_disk->createDirectories(path); -} - - -void IDiskRemote::clearDirectory(const String & path) -{ - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - if (isFile(it->path())) - removeFile(it->path()); -} - - -void IDiskRemote::removeDirectory(const String & path) -{ - metadata_disk->removeDirectory(path); -} - - -DiskDirectoryIteratorPtr IDiskRemote::iterateDirectory(const String & path) -{ - return metadata_disk->iterateDirectory(path); -} - - -void IDiskRemote::listFiles(const String & path, std::vector & file_names) -{ - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - file_names.push_back(it->name()); -} - - -void IDiskRemote::setLastModified(const String & path, const Poco::Timestamp & timestamp) -{ - metadata_disk->setLastModified(path, timestamp); -} - - -Poco::Timestamp IDiskRemote::getLastModified(const String & path) -{ - return metadata_disk->getLastModified(path); -} - - -void IDiskRemote::createHardLink(const String & src_path, const String & dst_path) -{ - readUpdateAndStoreMetadata(src_path, false, [] (Metadata & metadata) { metadata.ref_count++; return true; }); - - /// Create FS hardlink to metadata file. - metadata_disk->createHardLink(src_path, dst_path); -} - - -ReservationPtr IDiskRemote::reserve(UInt64 bytes) -{ - if (!tryReserve(bytes)) - return {}; - - return std::make_unique(std::static_pointer_cast(shared_from_this()), bytes); -} - - -bool IDiskRemote::tryReserve(UInt64 bytes) -{ - std::lock_guard lock(reservation_mutex); - if (bytes == 0) - { - LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); - ++reservation_count; - return true; - } - - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); - if (unreserved_space >= bytes) - { - LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", - ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); - ++reservation_count; - reserved_bytes += bytes; - return true; - } - return false; -} - -String IDiskRemote::getUniqueId(const String & path) const -{ - LOG_TRACE(log, "Remote path: {}, Path: {}", remote_fs_root_path, path); - auto metadata = readMetadata(path); - String id; - if (!metadata.remote_fs_objects.empty()) - id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].relative_path; - return id; -} - - -AsynchronousReaderPtr IDiskRemote::getThreadPoolReader() -{ - constexpr size_t pool_size = 50; - constexpr size_t queue_size = 1000000; - static AsynchronousReaderPtr reader = std::make_shared(pool_size, queue_size); - return reader; -} - -UInt32 IDiskRemote::getRefCount(const String & path) const -{ - return readMetadata(path).ref_count; -} - -ThreadPool & IDiskRemote::getThreadPoolWriter() -{ - constexpr size_t pool_size = 100; - constexpr size_t queue_size = 1000000; - static ThreadPool writer(pool_size, pool_size, queue_size); - return writer; -} - -} diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h deleted file mode 100644 index 327452c0bbf..00000000000 --- a/src/Disks/IDiskRemote.h +++ /dev/null @@ -1,302 +0,0 @@ -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace CurrentMetrics -{ - extern const Metric DiskSpaceReservedForMerge; -} - -namespace DB -{ - -class IAsynchronousReader; -using AsynchronousReaderPtr = std::shared_ptr; - - -/// Base Disk class for remote FS's, which are not posix-compatible (e.g. DiskS3, DiskHDFS, DiskBlobStorage) -class IDiskRemote : public IDisk -{ - -friend class DiskRemoteReservation; - -public: - IDiskRemote( - const String & name_, - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - FileCachePtr cache_, - const String & log_name_, - size_t thread_pool_size); - - struct Metadata; - using MetadataUpdater = std::function; - - const String & getName() const final override { return name; } - - const String & getPath() const final override { return metadata_disk->getPath(); } - - String getCacheBasePath() const final override; - - std::vector getRemotePaths(const String & local_path) const final override; - - void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; - - /// Methods for working with metadata. For some operations (like hardlink - /// creation) metadata can be updated concurrently from multiple threads - /// (file actually rewritten on disk). So additional RW lock is required for - /// metadata read and write, but not for create new metadata. - Metadata readMetadata(const String & path) const; - Metadata readMetadataUnlocked(const String & path, std::shared_lock &) const; - Metadata readUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater); - Metadata readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater); - - Metadata readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, MetadataUpdater updater); - - Metadata createAndStoreMetadata(const String & path, bool sync); - Metadata createUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater); - - UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } - - UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } - - UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } - - UInt64 getKeepingFreeSpace() const override { return 0; } - - bool exists(const String & path) const override; - - bool isFile(const String & path) const override; - - void createFile(const String & path) override; - - size_t getFileSize(const String & path) const override; - - void moveFile(const String & from_path, const String & to_path) override; - - void replaceFile(const String & from_path, const String & to_path) override; - - void removeFile(const String & path) override { removeSharedFile(path, false); } - - void removeFileIfExists(const String & path) override { removeSharedFileIfExists(path, false); } - - void removeRecursive(const String & path) override { removeSharedRecursive(path, false, {}); } - - - void removeSharedFile(const String & path, bool delete_metadata_only) override; - - void removeSharedFileIfExists(const String & path, bool delete_metadata_only) override; - - void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - - void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - - void listFiles(const String & path, std::vector & file_names) override; - - void setReadOnly(const String & path) override; - - bool isDirectory(const String & path) const override; - - void createDirectory(const String & path) override; - - void createDirectories(const String & path) override; - - void clearDirectory(const String & path) override; - - void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); } - - void removeDirectory(const String & path) override; - - DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; - - void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - - Poco::Timestamp getLastModified(const String & path) override; - - void createHardLink(const String & src_path, const String & dst_path) override; - - ReservationPtr reserve(UInt64 bytes) override; - - String getUniqueId(const String & path) const override; - - bool checkUniqueId(const String & id) const override = 0; - - virtual void removeFromRemoteFS(const std::vector & paths) = 0; - - static AsynchronousReaderPtr getThreadPoolReader(); - - static ThreadPool & getThreadPoolWriter(); - - DiskPtr getMetadataDiskIfExistsOrSelf() override { return metadata_disk; } - - UInt32 getRefCount(const String & path) const override; - - /// Return metadata for each file path. Also, before serialization reset - /// ref_count for each metadata to zero. This function used only for remote - /// fetches/sends in replicated engines. That's why we reset ref_count to zero. - std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; -protected: - Poco::Logger * log; - const String name; - const String remote_fs_root_path; - - DiskPtr metadata_disk; - - FileCachePtr cache; - -private: - void removeMetadata(const String & path, std::vector & paths_to_remove); - - void removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove); - - bool tryReserve(UInt64 bytes); - - UInt64 reserved_bytes = 0; - UInt64 reservation_count = 0; - std::mutex reservation_mutex; - mutable std::shared_mutex metadata_mutex; -}; - -using RemoteDiskPtr = std::shared_ptr; - -/// Remote FS (S3, HDFS) metadata file layout: -/// FS objects, their number and total size of all FS objects. -/// Each FS object represents a file path in remote FS and its size. - -struct IDiskRemote::Metadata -{ - using Updater = std::function; - /// Metadata file version. - static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; - static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; - static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; - - /// Remote FS objects paths and their sizes. - std::vector remote_fs_objects; - - /// URI - const String & remote_fs_root_path; - - /// Relative path to metadata file on local FS. - const String metadata_file_path; - - DiskPtr metadata_disk; - - /// Total size of all remote FS (S3, HDFS) objects. - size_t total_size = 0; - - /// Number of references (hardlinks) to this metadata file. - /// - /// FIXME: Why we are tracking it explicetly, without - /// info from filesystem???? - UInt32 ref_count = 0; - - /// Flag indicates that file is read only. - bool read_only = false; - - Metadata( - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - const String & metadata_file_path_); - - void addObject(const String & path, size_t size); - - static Metadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_); - static Metadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - static Metadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - - static Metadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync); - static Metadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - static Metadata createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite); - - /// Serialize metadata to string (very same with saveToBuffer) - std::string serializeToString(); - -private: - /// Fsync metadata file if 'sync' flag is set. - void save(bool sync = false); - void saveToBuffer(WriteBuffer & buffer, bool sync); - void load(); -}; - -class DiskRemoteReservation final : public IReservation -{ -public: - DiskRemoteReservation(const RemoteDiskPtr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) - { - } - - UInt64 getSize() const override { return size; } - - DiskPtr getDisk(size_t i) const override; - - Disks getDisks() const override { return {disk}; } - - void update(UInt64 new_size) override; - - ~DiskRemoteReservation() override; - -private: - RemoteDiskPtr disk; - UInt64 size; - CurrentMetrics::Increment metric_increment; -}; - - -/// Runs tasks asynchronously using thread pool. -class AsyncExecutor : public Executor -{ -public: - explicit AsyncExecutor(const String & name_, int thread_pool_size) - : name(name_) - , pool(ThreadPool(thread_pool_size)) {} - - std::future execute(std::function task) override - { - auto promise = std::make_shared>(); - pool.scheduleOrThrowOnError( - [promise, task]() - { - try - { - task(); - promise->set_value(); - } - catch (...) - { - tryLogCurrentException("Failed to run async task"); - - try - { - promise->set_exception(std::current_exception()); - } - catch (...) {} - } - }); - - return promise->get_future(); - } - - void setMaxThreads(size_t threads) - { - pool.setMaxThreads(threads); - } - -private: - String name; - ThreadPool pool; -}; - -} diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 57b72d0190d..c2a317b43b0 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -1,6 +1,5 @@ #include "ReadBufferFromRemoteFSGather.h" -#include #include #include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index ba477ced601..e7eb6296a19 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -1,9 +1,9 @@ #pragma once #include -#include #include #include +#include #if USE_AZURE_BLOB_STORAGE #include diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h index a0669be411f..64495a538e4 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h @@ -2,7 +2,6 @@ #include #include -#include #include diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index b2d5f11724a..0d5513e4c01 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB diff --git a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h index 84bd2b99c7e..38a1872bb45 100644 --- a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h @@ -2,7 +2,6 @@ #include -#include #include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 34f396b978c..5f725d92447 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -313,7 +313,7 @@ struct ContextSharedPart /// since it may use per-user MemoryTracker which will be destroyed here. try { - IDiskRemote::getThreadPoolWriter().wait(); + IObjectStorage::getThreadPoolWriter().wait(); } catch (...) { diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 064447c54ad..09c8fe3a6ab 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index fb6a055c6e5..5cc79c1ceee 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB { From 1a21ec9026d81b3618ca6dde354da3c9b180ce30 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 May 2022 21:13:36 +0300 Subject: [PATCH 297/615] Update Client.cpp --- programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 96f10de9781..cbbf195a68c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -69,7 +69,7 @@ void Client::processError(const String & query) const { if (server_exception) { - fmt::print(stderr, "Received exception from the server (version {}):\n{}\n", + fmt::print(stderr, "Received exception from server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); if (is_interactive) From e278bfa81ece15e38c7a9b7c056b66e81e5131db Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 19 May 2022 21:03:49 +0200 Subject: [PATCH 298/615] Fix fast test build --- src/Disks/AzureObjectStorage.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Disks/AzureObjectStorage.cpp b/src/Disks/AzureObjectStorage.cpp index 68f7f63638a..7118d3e0c01 100644 --- a/src/Disks/AzureObjectStorage.cpp +++ b/src/Disks/AzureObjectStorage.cpp @@ -1,4 +1,7 @@ #include + +#if USE_AZURE_BLOB_STORAGE + #include #include #include @@ -211,3 +214,5 @@ std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std } } + +#endif From 121571ec5d2a197254b26196367659a4988f0f64 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 22:37:54 +0200 Subject: [PATCH 299/615] Ugly hacks for performance.tgz artifacts --- tests/ci/build_check.py | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 749a416ecad..2e04ea11487 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -148,19 +148,21 @@ def create_json_artifact( json.dump(result, build_links) -def get_release_or_pr( - pr_info: PRInfo, build_config: BuildConfig, version: ClickHouseVersion -) -> str: +def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, str]: + # FIXME performance + # performance builds are havily relies on a fixed path for artifacts, that's why + # we need to preserve 0 for anything but PR number + # It should be fixed in performance-comparison image eventually + performance_pr = "0" if "release" in pr_info.labels or "release-lts" in pr_info.labels: # for release pull requests we use branch names prefixes, not pr numbers - return pr_info.head_ref - elif pr_info.number == 0 and build_config["package_type"] != "performance": - # for pushes to master - major version, but not for performance builds - # they havily relies on a fixed path for build package and nobody going - # to deploy them somewhere, so it's ok. - return f"{version.major}.{version.minor}" + return pr_info.head_ref, performance_pr + elif pr_info.number == 0: + # for pushes to master - major version + return f"{version.major}.{version.minor}", performance_pr # PR number for anything else - return str(pr_info.number) + pr_number = str(pr_info.number) + return pr_number, pr_number def upload_master_static_binaries( @@ -201,9 +203,13 @@ def main(): s3_helper = S3Helper("https://s3.amazonaws.com") version = get_version_from_repo(git=Git(True)) - release_or_pr = get_release_or_pr(pr_info, build_config, version) + release_or_pr, performance_pr = get_release_or_pr(pr_info, version) s3_path_prefix = "/".join((release_or_pr, pr_info.sha, build_name)) + # FIXME performance + s3_performance_path = "/".join( + (performance_pr, pr_info.sha, build_name, "performance.tgz") + ) # If this is rerun, then we try to find already created artifacts and just # put them as github actions artifcat (result) @@ -314,6 +320,19 @@ def main(): else: logging.info("Build log doesn't exist") + # FIXME performance + performance_url = [] + performance_path = os.path.join(build_output_path, "performance.tgz") + if os.path.exists(performance_path): + performance_url.append( + s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path) + ) + logging.info( + "Uploaded performance.tgz to %s, now delete to avoid duplication", + performance_url[0], + ) + os.remove(performance_path) + build_urls = s3_helper.upload_build_folder_to_s3( build_output_path, s3_path_prefix, From de39a48c33d92b3681e20cc44321cd0a4a01c4b0 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 22:44:10 +0200 Subject: [PATCH 300/615] Append performance_url to build_urls --- tests/ci/build_check.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 2e04ea11487..f8397bf3e76 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -321,23 +321,26 @@ def main(): logging.info("Build log doesn't exist") # FIXME performance - performance_url = [] + performance_urls = [] performance_path = os.path.join(build_output_path, "performance.tgz") if os.path.exists(performance_path): - performance_url.append( + performance_urls.append( s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path) ) logging.info( "Uploaded performance.tgz to %s, now delete to avoid duplication", - performance_url[0], + performance_urls[0], ) os.remove(performance_path) - build_urls = s3_helper.upload_build_folder_to_s3( - build_output_path, - s3_path_prefix, - keep_dirs_in_s3_path=False, - upload_symlinks=False, + build_urls = ( + s3_helper.upload_build_folder_to_s3( + build_output_path, + s3_path_prefix, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) + + performance_urls ) logging.info("Got build URLs %s", build_urls) From de23fdccf60bbcf4ae6951989fd15167ae181294 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 19 May 2022 23:00:30 +0200 Subject: [PATCH 301/615] Fix get_release_or_pr execution in docker_server --- tests/ci/docker_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 988771b1577..2fcd0172112 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -292,7 +292,7 @@ def main(): pr_info = None if CI: pr_info = PRInfo() - release_or_pr = get_release_or_pr(pr_info, {"package_type": ""}, args.version) + release_or_pr = get_release_or_pr(pr_info, args.version) args.bucket_prefix = ( f"https://s3.amazonaws.com/{S3_BUILDS_BUCKET}/" f"{release_or_pr}/{pr_info.sha}" From 39097a4cfcdb5a76292a883dc350e16207fd92d3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 20 May 2022 00:06:44 +0200 Subject: [PATCH 302/615] Fix automatic bucket_prefix, exit 1 on failures --- tests/ci/docker_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 2fcd0172112..97b901617f9 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -5,6 +5,7 @@ import argparse import json import logging import subprocess +import sys from os import path as p, makedirs from typing import List, Tuple @@ -292,7 +293,7 @@ def main(): pr_info = None if CI: pr_info = PRInfo() - release_or_pr = get_release_or_pr(pr_info, args.version) + release_or_pr, _ = get_release_or_pr(pr_info, args.version) args.bucket_prefix = ( f"https://s3.amazonaws.com/{S3_BUILDS_BUCKET}/" f"{release_or_pr}/{pr_info.sha}" @@ -350,6 +351,8 @@ def main(): ) ch_helper = ClickHouseHelper() ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + if status != "success": + sys.exit(1) if __name__ == "__main__": From 441cc88e763611302efbcd3be101fd35f27bbf81 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 May 2022 02:22:09 +0300 Subject: [PATCH 303/615] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9eec13c884c..e92f93b54ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,6 @@ * New single binary based diagnostics tool (clickhouse-diagnostics). [#36705](https://github.com/ClickHouse/ClickHouse/pull/36705) ([Dale McDiarmid](https://github.com/gingerwizard)). * Add output format `Prometheus` [#36051](https://github.com/ClickHouse/ClickHouse/issues/36051). [#36206](https://github.com/ClickHouse/ClickHouse/pull/36206) ([Vladimir C](https://github.com/vdimir)). * Add `MySQLDump` input format. It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. [#36667](https://github.com/ClickHouse/ClickHouse/pull/36667) ([Kruglov Pavel](https://github.com/Avogar)). -* Window function `nth_value` is added. [#36601](https://github.com/ClickHouse/ClickHouse/pull/36601) ([Nikolay](https://github.com/ndchikin)). * Show the `total_rows` and `total_bytes` fields in `system.tables` for temporary tables. [#36401](https://github.com/ClickHouse/ClickHouse/issues/36401). [#36439](https://github.com/ClickHouse/ClickHouse/pull/36439) ([xiedeyantu](https://github.com/xiedeyantu)). * Allow to override `parts_to_delay_insert` and `parts_to_throw_insert` with query-level settings. If they are defined, they will override table-level settings. [#36371](https://github.com/ClickHouse/ClickHouse/pull/36371) ([Memo](https://github.com/Joeywzr)). From a02115f104f20570e6c3f03862ed1aff5d04facc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 May 2022 02:50:38 +0300 Subject: [PATCH 304/615] Update 02304_grouping_sets_with_rollup_cube.sql --- .../0_stateless/02304_grouping_sets_with_rollup_cube.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql index 25263edc980..524ef363b6b 100644 --- a/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql +++ b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql @@ -1,3 +1,5 @@ +-- Tags: no-backward-compatibility-check:22.4 + SELECT number FROM From 7c9df33bf8c65d41543cc7a1839abc482eb11c25 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 May 2022 03:12:44 +0300 Subject: [PATCH 305/615] Update warnings.cmake --- cmake/warnings.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 3a6b44b9170..4b8f83df090 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -16,7 +16,10 @@ if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE endif () if (COMPILER_CLANG) + # Add some warnings that are not available even with -Wall -Wextra -Wpedantic. + # We want to get everything out of the compiler for code quality. add_warning(everything) + add_warning(pedantic) no_warning(vla-extension) no_warning(zero-length-array) From c3c468af86dc8619c506d79d69b77b35f7a3c3d8 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 19 May 2022 21:16:27 -0300 Subject: [PATCH 306/615] Update random-functions.md --- .../sql-reference/functions/random-functions.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 5e20a93da1f..3931898f081 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -96,10 +96,14 @@ SELECT fuzzBits(materialize('abacaba'), 0.1) FROM numbers(3) ``` -\`\`\` text -┌─fuzzBits(materialize(‘abacaba’), 0.1)─┐ -│ abaaaja │ -│ a\*cjab+ │ -│ aeca2A │ -└───────────────────────────────────────┘ +Result: + +``` text +┌─fuzzBits(materialize('abacaba'), 0.1)─┐ +│ abaaaja │ +│ a*cjab+ │ +│ aeca2A │ +└───────────────────────────────────────┘ +``` + From 03e3be7b1af8f0a7bdb78666a8b7c41ea0923dc8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 May 2022 02:38:50 +0200 Subject: [PATCH 307/615] Maybe fix error --- programs/main.cpp | 52 ++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/programs/main.cpp b/programs/main.cpp index 243159d9bf5..bee1496095c 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -355,31 +355,7 @@ void setUserAndGroup() static constexpr size_t buf_size = 16384; /// Linux man page says it is enough. Nevertheless, we will check if it's not enough and throw. std::unique_ptr buf(new char[buf_size]); - const char * env_uid = getenv("CLICKHOUSE_SETUID"); - if (env_uid && env_uid[0]) - { - /// Is it numeric id or name? - uid_t uid = 0; - if (!tryParse(uid, env_uid) || uid == 0) - { - passwd entry{}; - passwd * result{}; - - if (0 != getpwnam_r(env_uid, &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); - - if (!result) - throw Exception("User {} specified in the CLICKHOUSE_SETUID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); - - uid = entry.pw_uid; - } - - if (uid == 0) - throw Exception("User specified in the CLICKHOUSE_SETUID environment variable has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); - - if (0 != setuid(uid)) - throwFromErrno(fmt::format("Cannot do 'setuid' to user, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); - } + /// Set the group first, because if we set user, the privileges will be already dropped and we will not be able to set the group later. const char * env_gid = getenv("CLICKHOUSE_SETGID"); if (env_gid && env_gid[0]) @@ -405,6 +381,32 @@ void setUserAndGroup() if (0 != setgid(gid)) throwFromErrno(fmt::format("Cannot do 'setgid' to user, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); } + + const char * env_uid = getenv("CLICKHOUSE_SETUID"); + if (env_uid && env_uid[0]) + { + /// Is it numeric id or name? + uid_t uid = 0; + if (!tryParse(uid, env_uid) || uid == 0) + { + passwd entry{}; + passwd * result{}; + + if (0 != getpwnam_r(env_uid, &entry, buf.get(), buf_size, &result)) + throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); + + if (!result) + throw Exception("User {} specified in the CLICKHOUSE_SETUID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); + + uid = entry.pw_uid; + } + + if (uid == 0) + throw Exception("User specified in the CLICKHOUSE_SETUID environment variable has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); + + if (0 != setuid(uid)) + throwFromErrno(fmt::format("Cannot do 'setuid' to user, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); + } } From a8b2f1211106c52ad149f8aa7481da3ca8b3378b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 19 May 2022 21:55:41 -0400 Subject: [PATCH 308/615] allow SALT in sha256_hash --- src/Parsers/Access/ParserCreateUserQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index b7d49be879e..532a1bae47f 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -46,7 +46,7 @@ namespace } - bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, bool id_mode, AuthenticationData & auth_data) + bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, bool /*id_mode*/, AuthenticationData & auth_data) { return IParserBase::wrapParseImpl(pos, [&] { @@ -120,7 +120,7 @@ namespace return false; value = ast->as().value.safeGet(); - if (id_mode && expect_hash) + if (expect_hash) { if (ParserKeyword{"SALT"}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected)) { From b7bc40ce58f7560a996c63e04f330c414e47311e Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 19 May 2022 22:16:08 -0400 Subject: [PATCH 309/615] documentation updated --- docs/en/sql-reference/statements/create/user.md | 4 +++- docs/ru/sql-reference/statements/create/user.md | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 0aad0961a8b..32f2d06bb6d 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -29,12 +29,14 @@ There are multiple ways of user identification: - `IDENTIFIED WITH no_password` - `IDENTIFIED WITH plaintext_password BY 'qwerty'` - `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` -- `IDENTIFIED WITH sha256_hash BY 'hash'` +- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` - `IDENTIFIED WITH double_sha1_password BY 'qwerty'` - `IDENTIFIED WITH double_sha1_hash BY 'hash'` - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +For identifacation with sha256_hash using `SALT` - hash must be calculated from concatination of 'user' and 'salt'. + ## User Host {#user-host} User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways: diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 78c481e8eb7..288bc447250 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -29,12 +29,14 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] - `IDENTIFIED WITH no_password` - `IDENTIFIED WITH plaintext_password BY 'qwerty'` - `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` -- `IDENTIFIED WITH sha256_hash BY 'hash'` +- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` - `IDENTIFIED WITH double_sha1_password BY 'qwerty'` - `IDENTIFIED WITH double_sha1_hash BY 'hash'` - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +Для идентификации с sha256_hash используя `SALT` - хэш должен быть вычислен от конкатенации 'user' и 'salt'. + ## Пользовательский хост Пользовательский хост — это хост, с которого можно установить соединение с сервером ClickHouse. Хост задается в секции `HOST` следующими способами: From 442af375bc9adf5ee492a36a5026a1eeeedca721 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Fri, 20 May 2022 11:54:12 +0800 Subject: [PATCH 310/615] empty From a5e421581d80827a955fe747eb7548646e7faabc Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 20 May 2022 01:30:45 -0400 Subject: [PATCH 311/615] docs fix --- docs/en/sql-reference/statements/create/user.md | 2 +- docs/ru/sql-reference/statements/create/user.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 32f2d06bb6d..588344520af 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -35,7 +35,7 @@ There are multiple ways of user identification: - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` -For identifacation with sha256_hash using `SALT` - hash must be calculated from concatination of 'user' and 'salt'. +For identifacation with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'. ## User Host {#user-host} diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 288bc447250..d7da1748821 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -35,7 +35,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` -Для идентификации с sha256_hash используя `SALT` - хэш должен быть вычислен от конкатенации 'user' и 'salt'. +Для идентификации с sha256_hash используя `SALT` - хэш должен быть вычислен от конкатенации 'password' и 'salt'. ## Пользовательский хост From 4885940378e204ee0deabe4029dcdea3f0f2de0d Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 19 May 2022 17:40:06 +0200 Subject: [PATCH 312/615] Removed ENABLE_MKL option, because we cannot depend on non-contrib libs --- contrib/eigen-cmake/CMakeLists.txt | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/contrib/eigen-cmake/CMakeLists.txt b/contrib/eigen-cmake/CMakeLists.txt index 6bdf3ab7c35..f462d5cdc93 100644 --- a/contrib/eigen-cmake/CMakeLists.txt +++ b/contrib/eigen-cmake/CMakeLists.txt @@ -2,20 +2,6 @@ set(EIGEN_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/eigen") add_library (_eigen INTERFACE) -option (ENABLE_MKL "Build Eigen with Intel MKL" OFF) -if (ENABLE_MKL) - set(MKL_THREADING sequential) - set(MKL_INTERFACE lp64) - find_package(MKL REQUIRED) - if (MKL_FOUND) - message("MKL INCLUDE: ${MKL_INCLUDE}") - message("MKL LIBRARIES: ${MKL_LIBRARIES}") - target_compile_definitions(_eigen INTERFACE EIGEN_USE_MKL_ALL) - target_include_directories(_eigen INTERFACE ${MKL_INCLUDE}) - target_link_libraries(_eigen INTERFACE ${MKL_LIBRARIES}) - endif() -endif() - # Only include MPL2 code from Eigen library target_compile_definitions(_eigen INTERFACE EIGEN_MPL2_ONLY) From 43509353773a05f1e7e695d737611a9df6c8fded Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 19 May 2022 17:50:13 +0200 Subject: [PATCH 313/615] Re-enable SSE2 for PowerPC --- cmake/cpu_features.cmake | 3 ++- contrib/eigen-cmake/CMakeLists.txt | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 37e6c356265..fd4cc51b6f9 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -32,7 +32,8 @@ elseif (ARCH_AARCH64) set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8-a+crc") elseif (ARCH_PPC64LE) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -DNO_WARN_X86_INTRINSICS") + # Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC + set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") elseif (ARCH_AMD64) set (TEST_FLAG "-mssse3") diff --git a/contrib/eigen-cmake/CMakeLists.txt b/contrib/eigen-cmake/CMakeLists.txt index f462d5cdc93..a37d341109c 100644 --- a/contrib/eigen-cmake/CMakeLists.txt +++ b/contrib/eigen-cmake/CMakeLists.txt @@ -5,5 +5,12 @@ add_library (_eigen INTERFACE) # Only include MPL2 code from Eigen library target_compile_definitions(_eigen INTERFACE EIGEN_MPL2_ONLY) +# Clang by default mimics gcc 4.2.1 compatibility but Eigen checks __GNUC__ version to enable +# a workaround for bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 fixed in 6.3 +# So we fake gcc > 6.3 when building with clang +if (COMPILER_CLANG AND ARCH_PPC64LE) + target_compile_options(_eigen INTERFACE -fgnuc-version=6.4) +endif() + target_include_directories (_eigen SYSTEM INTERFACE ${EIGEN_LIBRARY_DIR}) add_library(ch_contrib::eigen ALIAS _eigen) From 7c0e467b051ae88562989e3dd8332ff4efc9b1d5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 20 May 2022 09:46:19 +0200 Subject: [PATCH 314/615] Small update of comment for SOURCE_DATE_EPOCH workaround --- cmake/ccache.cmake | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index b20706efa5e..e300b4c45e8 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -35,16 +35,17 @@ endif() message(STATUS "Using ccache: ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION})") set(LAUNCHER ${CCACHE_EXECUTABLE}) -# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is -# filled from the debian/changelog or current time. +# Work around a well-intended but unfortunate behavior of ccache 4.0 & 4.1 with +# environment variable SOURCE_DATE_EPOCH. This variable provides an alternative +# to source-code embedded timestamps (__DATE__/__TIME__) and therefore helps with +# reproducible builds (*). SOURCE_DATE_EPOCH is set automatically by the +# distribution, e.g. Debian. Ccache 4.0 & 4.1 incorporate SOURCE_DATE_EPOCH into +# the hash calculation regardless they contain timestamps or not. This invalidates +# the cache whenever SOURCE_DATE_EPOCH changes. As a fix, ignore SOURCE_DATE_EPOCH. # -# - 4.0+ ccache always includes this environment variable into the hash -# of the manifest, which do not allow to use previous cache, -# - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__ -# -# Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2). +# (*) https://reproducible-builds.org/specs/source-date-epoch/ if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") - message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") + message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache 4.1 / 4.2") set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_EXECUTABLE}) endif() From 9b0acaa0cae54b552eb6a613eae8414dde2d3c72 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 20 May 2022 09:57:25 +0200 Subject: [PATCH 315/615] Update cmake/ccache.cmake Co-authored-by: Azat Khuzhin --- cmake/ccache.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index e300b4c45e8..f0769f337d0 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -45,7 +45,7 @@ set(LAUNCHER ${CCACHE_EXECUTABLE}) # # (*) https://reproducible-builds.org/specs/source-date-epoch/ if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") - message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache 4.1 / 4.2") + message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache 4.0 / 4.1") set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_EXECUTABLE}) endif() From cdbe6672cb7284cadf66ef3ac02fb3cc3fe9b99c Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Fri, 20 May 2022 11:35:18 +0300 Subject: [PATCH 316/615] Added one more symbol and proper comment. --- src/IO/S3/PocoHTTPClient.cpp | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index b9b20106465..e7fa36e5e41 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -191,33 +191,18 @@ void PocoHTTPClient::makeRequestInternal( Poco::Net::HTTPRequest poco_request(Poco::Net::HTTPRequest::HTTP_1_1); - /** Aws::Http::URI will encode URL in appropriate way for AWS S3 server. - * Poco::URI also does that correctly but it's not compatible with AWS. - * For example, `+` symbol will not be converted to `%2B` by Poco and would - * be received as space symbol. - * - * References: - * https://github.com/aws/aws-sdk-java/issues/1946 - * https://forums.aws.amazon.com/thread.jspa?threadID=55746 - * - * Example: - * Suppose we are requesting a file: abc+def.txt - * To correctly do it, we need to construct an URL containing either: - * - abc%2Bdef.txt - * this is also technically correct: - * - abc+def.txt - * but AWS servers don't support it properly, interpreting plus character as whitespace - * although it is in path part, not in query string. - * e.g. this is not correct: - * - abc%20def.txt - * - * Poco will keep plus character as is (which is correct) while AWS servers will treat it as whitespace, which is not what is intended. - * To overcome this limitation, we encode URL with "Aws::Http::URI" and then pass already prepared URL to Poco. + /** According to RFC-2616, Request-URI is allowed to be encoded. + * However, there is no clear agreement on which exact symbols must be encoded. + * Effectively, `Poco::URI` chooses smaller subset of characters to encode, + * whereas Amazon S3 and Google Cloud Storage expects another one. + * In order to successfully execute a request, a path must be exact representation + * of decoded path used by `S3AuthSigner`. + * Therefore we shall encode some symbols "manually" to fit the signatures. */ std::string path_and_query; const std::string & query = target_uri.getRawQuery(); - const std::string reserved = "?#:;+@&="; /// Poco::URI::RESERVED_QUERY_PARAM without '/'. + const std::string reserved = "?#:;+@&=%"; /// Poco::URI::RESERVED_QUERY_PARAM without '/' plus percent sign. Poco::URI::encode(target_uri.getPath(), reserved, path_and_query); if (!query.empty()) { From 6ff221a822b654025a518b9df4abe1c217f57cc6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 20 May 2022 12:07:34 +0200 Subject: [PATCH 317/615] Merge with master --- src/Disks/DiskObjectStorage.cpp | 15 +++++++++------ src/Disks/DiskObjectStorage.h | 5 ++++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 4b0134b2d07..012a6d5b4c9 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -720,27 +720,30 @@ void DiskObjectStorage::removeSharedRecursive(const String & path, bool keep_all } } -bool DiskObjectStorage::tryReserve(UInt64 bytes) +std::optional DiskObjectStorage::tryReserve(UInt64 bytes) { std::lock_guard lock(reservation_mutex); + + auto available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (bytes == 0) { LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); ++reservation_count; - return true; + return {unreserved_space}; } - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); if (unreserved_space >= bytes) { LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return true; + return {unreserved_space - bytes}; } - return false; + + return {}; } std::unique_ptr DiskObjectStorage::readFile( diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/DiskObjectStorage.h index 5ae014db6ca..7ddd3fa6798 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/DiskObjectStorage.h @@ -253,7 +253,10 @@ class DiskObjectStorageReservation final : public IReservation { public: DiskObjectStorageReservation(const std::shared_ptr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + : disk(disk_) + , size(size_) + , metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + {} UInt64 getSize() const override { return size; } From 12bbb7de87e005f9507863a056d692a6dcaf5a76 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 20 May 2022 12:41:44 +0200 Subject: [PATCH 318/615] fix race on TID allocation --- base/base/defines.h | 19 +++++++ src/Common/Exception.cpp | 15 ++++- src/Common/Exception.h | 6 +- src/Interpreters/DDLWorker.cpp | 2 +- src/Interpreters/MergeTreeTransaction.cpp | 6 +- src/Interpreters/TransactionLog.cpp | 49 +++++++++------- .../TransactionVersionMetadata.cpp | 30 +++++----- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 20 +++---- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../01133_begin_commit_race.reference | 0 .../0_stateless/01133_begin_commit_race.sh | 56 +++++++++++++++++++ 11 files changed, 148 insertions(+), 57 deletions(-) create mode 100644 tests/queries/0_stateless/01133_begin_commit_race.reference create mode 100755 tests/queries/0_stateless/01133_begin_commit_race.sh diff --git a/base/base/defines.h b/base/base/defines.h index bd98e99f5b9..084e710abf6 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -105,6 +105,25 @@ # define ASAN_POISON_MEMORY_REGION(a, b) #endif +#if !defined(ABORT_ON_LOGICAL_ERROR) + #if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) + #define ABORT_ON_LOGICAL_ERROR + #endif +#endif + +/// chassert(x) is similar to assert(x), but: +/// - works in builds with sanitizers, not only in debug builds +/// - tries to print failed assertion into server log +/// It can be used for all assertions except heavy ones. +/// Heavy assertions (that run loops or call complex functions) are allowed in debug builds only. +#if !defined(chassert) + #if defined(ABORT_ON_LOGICAL_ERROR) + #define chassert(x) static_cast(x) ? void(0) : abortOnFailedAssertion(#x) + #else + #define chassert(x) ((void)0) + #endif +#endif + /// A template function for suppressing warnings about unused variables or function results. template constexpr void UNUSED(Args &&... args [[maybe_unused]]) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 21f605ad353..d0f7af2da6b 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -35,6 +35,18 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } +void abortOnFailedAssertion(const String & description) +{ + LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description); + + /// This is to suppress -Wmissing-noreturn + volatile bool always_false = false; + if (always_false) + return; + + abort(); +} + /// - Aborts the process if error code is LOGICAL_ERROR. /// - Increments error codes statistics. void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool remote, const Exception::FramePointers & trace) @@ -44,8 +56,7 @@ void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool #ifdef ABORT_ON_LOGICAL_ERROR if (code == ErrorCodes::LOGICAL_ERROR) { - LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", msg); - abort(); + abortOnFailedAssertion(msg); } #endif diff --git a/src/Common/Exception.h b/src/Common/Exception.h index b2fc369237e..7a96aee555c 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -12,16 +12,14 @@ #include -#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) -#define ABORT_ON_LOGICAL_ERROR -#endif - namespace Poco { class Logger; } namespace DB { +void abortOnFailedAssertion(const String & description); + class Exception : public Poco::Exception { public: diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 0e12c5e9e5a..2b2de84c314 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -287,7 +287,7 @@ void DDLWorker::scheduleTasks(bool reinitialized) Strings queue_nodes = zookeeper->getChildren(queue_dir, &queue_node_stat, queue_updated_event); size_t size_before_filtering = queue_nodes.size(); filterAndSortQueueNodes(queue_nodes); - /// The following message is too verbose, but it can be useful too debug mysterious test failures in CI + /// The following message is too verbose, but it can be useful to debug mysterious test failures in CI LOG_TRACE(log, "scheduleTasks: initialized={}, size_before_filtering={}, queue_size={}, " "entries={}..{}, " "first_failed_task_name={}, current_tasks_size={}, " diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp index c0d3cdfeb62..d3f523aafc9 100644 --- a/src/Interpreters/MergeTreeTransaction.cpp +++ b/src/Interpreters/MergeTreeTransaction.cpp @@ -158,7 +158,7 @@ void MergeTreeTransaction::addMutation(const StoragePtr & table, const String & bool MergeTreeTransaction::isReadOnly() const { std::lock_guard lock{mutex}; - assert((creating_parts.empty() && removing_parts.empty() && mutations.empty()) == storages.empty()); + chassert((creating_parts.empty() && removing_parts.empty() && mutations.empty()) == storages.empty()); return storages.empty(); } @@ -204,7 +204,7 @@ void MergeTreeTransaction::afterCommit(CSN assigned_csn) noexcept /// and we will be able to remove old entries from transaction log in ZK. /// It's not a problem if server crash before CSN is written, because we already have TID in data part and entry in the log. [[maybe_unused]] CSN prev_value = csn.exchange(assigned_csn); - assert(prev_value == Tx::CommittingCSN); + chassert(prev_value == Tx::CommittingCSN); for (const auto & part : creating_parts) { part->version.creation_csn.store(csn); @@ -321,7 +321,7 @@ String MergeTreeTransaction::dumpDescription() const { String info = fmt::format("{} (created by {}, {})", part->name, part->version.getCreationTID(), part->version.creation_csn); std::get<1>(storage_to_changes[&(part->storage)]).push_back(std::move(info)); - assert(!part->version.creation_csn || part->version.creation_csn <= snapshot); + chassert(!part->version.creation_csn || part->version.creation_csn <= snapshot); } for (const auto & mutation : mutations) diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index e65630d907b..3fe0920427e 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -145,24 +145,29 @@ void TransactionLog::loadEntries(Strings::const_iterator beg, Strings::const_ite NOEXCEPT_SCOPE; LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global); - std::lock_guard lock{mutex}; - for (const auto & entry : loaded) { - if (entry.first == Tx::EmptyTID.getHash()) - continue; + std::lock_guard lock{mutex}; + for (const auto & entry : loaded) + { + if (entry.first == Tx::EmptyTID.getHash()) + continue; - tid_to_csn.emplace(entry.first, entry.second); + tid_to_csn.emplace(entry.first, entry.second); + } + last_loaded_entry = last_entry; + } + { + std::lock_guard lock{running_list_mutex}; + latest_snapshot = loaded.back().second.csn; + local_tid_counter = Tx::MaxReservedLocalTID; } - last_loaded_entry = last_entry; - latest_snapshot = loaded.back().second.csn; - local_tid_counter = Tx::MaxReservedLocalTID; } void TransactionLog::loadLogFromZooKeeper() { - assert(!zookeeper); - assert(tid_to_csn.empty()); - assert(last_loaded_entry.empty()); + chassert(!zookeeper); + chassert(tid_to_csn.empty()); + chassert(last_loaded_entry.empty()); zookeeper = global_context->getZooKeeper(); /// We do not write local_tid_counter to disk or zk and maintain it only in memory. @@ -172,7 +177,7 @@ void TransactionLog::loadLogFromZooKeeper() if (code != Coordination::Error::ZOK) { /// Log probably does not exist, create it - assert(code == Coordination::Error::ZNONODE); + chassert(code == Coordination::Error::ZNONODE); zookeeper->createAncestors(zookeeper_path_log); Coordination::Requests ops; ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/tail_ptr", serializeCSN(Tx::MaxReservedCSN), zkutil::CreateMode::Persistent)); @@ -192,11 +197,11 @@ void TransactionLog::loadLogFromZooKeeper() /// 2. simplify log rotation /// 3. support 64-bit CSNs on top of Apache ZooKeeper (it uses Int32 for sequential numbers) Strings entries_list = zookeeper->getChildren(zookeeper_path_log, nullptr, log_updated_event); - assert(!entries_list.empty()); + chassert(!entries_list.empty()); std::sort(entries_list.begin(), entries_list.end()); loadEntries(entries_list.begin(), entries_list.end()); - assert(!last_loaded_entry.empty()); - assert(latest_snapshot == deserializeCSN(last_loaded_entry)); + chassert(!last_loaded_entry.empty()); + chassert(latest_snapshot == deserializeCSN(last_loaded_entry)); local_tid_counter = Tx::MaxReservedLocalTID; tail_ptr = deserializeCSN(zookeeper->get(zookeeper_path + "/tail_ptr")); @@ -241,12 +246,12 @@ void TransactionLog::runUpdatingThread() void TransactionLog::loadNewEntries() { Strings entries_list = zookeeper->getChildren(zookeeper_path_log, nullptr, log_updated_event); - assert(!entries_list.empty()); + chassert(!entries_list.empty()); std::sort(entries_list.begin(), entries_list.end()); auto it = std::upper_bound(entries_list.begin(), entries_list.end(), last_loaded_entry); loadEntries(it, entries_list.end()); - assert(last_loaded_entry == entries_list.back()); - assert(latest_snapshot == deserializeCSN(last_loaded_entry)); + chassert(last_loaded_entry == entries_list.back()); + chassert(latest_snapshot == deserializeCSN(last_loaded_entry)); latest_snapshot.notify_all(); } @@ -396,7 +401,7 @@ void TransactionLog::rollbackTransaction(const MergeTreeTransactionPtr & txn) no if (!txn->rollback()) { /// Transaction was cancelled concurrently, it's already rolled back. - assert(txn->csn == Tx::RolledBackCSN); + chassert(txn->csn == Tx::RolledBackCSN); return; } @@ -438,8 +443,8 @@ CSN TransactionLog::getCSN(const TIDHash & tid) CSN TransactionLog::getCSNImpl(const TIDHash & tid_hash) const { - assert(tid_hash); - assert(tid_hash != Tx::EmptyTID.getHash()); + chassert(tid_hash); + chassert(tid_hash != Tx::EmptyTID.getHash()); std::lock_guard lock{mutex}; auto it = tid_to_csn.find(tid_hash); @@ -467,6 +472,8 @@ CSN TransactionLog::getOldestSnapshot() const std::lock_guard lock{running_list_mutex}; if (snapshots_in_use.empty()) return getLatestSnapshot(); + chassert(running_list.size() == snapshots_in_use.size()); + chassert(snapshots_in_use.size() < 2 || snapshots_in_use.front() <= *++snapshots_in_use.begin()); return snapshots_in_use.front(); } diff --git a/src/Interpreters/TransactionVersionMetadata.cpp b/src/Interpreters/TransactionVersionMetadata.cpp index b965ade8d10..fd75faaf206 100644 --- a/src/Interpreters/TransactionVersionMetadata.cpp +++ b/src/Interpreters/TransactionVersionMetadata.cpp @@ -88,8 +88,8 @@ void VersionMetadata::lockRemovalTID(const TransactionID & tid, const Transactio bool VersionMetadata::tryLockRemovalTID(const TransactionID & tid, const TransactionInfoContext & context, TIDHash * locked_by_id) { - assert(!tid.isEmpty()); - assert(!creation_tid.isEmpty()); + chassert(!tid.isEmpty()); + chassert(!creation_tid.isEmpty()); TIDHash removal_lock_value = tid.getHash(); TIDHash expected_removal_lock_value = 0; bool locked = removal_tid_lock.compare_exchange_strong(expected_removal_lock_value, removal_lock_value); @@ -115,7 +115,7 @@ bool VersionMetadata::tryLockRemovalTID(const TransactionID & tid, const Transac void VersionMetadata::unlockRemovalTID(const TransactionID & tid, const TransactionInfoContext & context) { LOG_TEST(log, "Unlocking removal_tid by {}, table: {}, part: {}", tid, context.table.getNameForLogs(), context.part_name); - assert(!tid.isEmpty()); + chassert(!tid.isEmpty()); TIDHash removal_lock_value = tid.getHash(); TIDHash locked_by = removal_tid_lock.load(); @@ -145,7 +145,7 @@ bool VersionMetadata::isRemovalTIDLocked() const void VersionMetadata::setCreationTID(const TransactionID & tid, TransactionInfoContext * context) { /// NOTE ReplicatedMergeTreeSink may add one part multiple times - assert(creation_tid.isEmpty() || creation_tid == tid); + chassert(creation_tid.isEmpty() || creation_tid == tid); creation_tid = tid; if (context) tryWriteEventToSystemLog(log, TransactionsInfoLogElement::ADD_PART, tid, *context); @@ -158,7 +158,7 @@ bool VersionMetadata::isVisible(const MergeTreeTransaction & txn) bool VersionMetadata::isVisible(CSN snapshot_version, TransactionID current_tid) { - assert(!creation_tid.isEmpty()); + chassert(!creation_tid.isEmpty()); CSN creation = creation_csn.load(std::memory_order_relaxed); TIDHash removal_lock = removal_tid_lock.load(std::memory_order_relaxed); CSN removal = removal_csn.load(std::memory_order_relaxed); @@ -166,10 +166,10 @@ bool VersionMetadata::isVisible(CSN snapshot_version, TransactionID current_tid) [[maybe_unused]] bool had_creation_csn = creation; [[maybe_unused]] bool had_removal_tid = removal_lock; [[maybe_unused]] bool had_removal_csn = removal; - assert(!had_removal_csn || had_removal_tid); - assert(!had_removal_csn || had_creation_csn); - assert(creation == Tx::UnknownCSN || creation == Tx::PrehistoricCSN || Tx::MaxReservedCSN < creation); - assert(removal == Tx::UnknownCSN || removal == Tx::PrehistoricCSN || Tx::MaxReservedCSN < removal); + chassert(!had_removal_csn || had_removal_tid); + chassert(!had_removal_csn || had_creation_csn); + chassert(creation == Tx::UnknownCSN || creation == Tx::PrehistoricCSN || Tx::MaxReservedCSN < creation); + chassert(removal == Tx::UnknownCSN || removal == Tx::PrehistoricCSN || Tx::MaxReservedCSN < removal); /// Special snapshot for introspection purposes if (unlikely(snapshot_version == Tx::EverythingVisibleCSN)) @@ -204,8 +204,8 @@ bool VersionMetadata::isVisible(CSN snapshot_version, TransactionID current_tid) /// Data part has creation_tid/removal_tid, but does not have creation_csn/removal_csn. /// It means that some transaction is creating/removing the part right now or has done it recently /// and we don't know if it was already committed or not. - assert(!had_creation_csn || (had_removal_tid && !had_removal_csn)); - assert(current_tid.isEmpty() || (creation_tid != current_tid && removal_lock != current_tid.getHash())); + chassert(!had_creation_csn || (had_removal_tid && !had_removal_csn)); + chassert(current_tid.isEmpty() || (creation_tid != current_tid && removal_lock != current_tid.getHash())); /// Before doing CSN lookup, let's check some extra conditions. /// If snapshot_version <= some_tid.start_csn, then changes of the transaction with some_tid @@ -347,8 +347,8 @@ void VersionMetadata::write(WriteBuffer & buf) const if (removal_tid_lock) { - assert(!removal_tid.isEmpty()); - assert(removal_tid.getHash() == removal_tid_lock); + chassert(!removal_tid.isEmpty()); + chassert(removal_tid.getHash() == removal_tid_lock); writeRemovalTID(buf); writeCSN(buf, REMOVAL, /* internal */ true); } @@ -384,7 +384,7 @@ void VersionMetadata::read(ReadBuffer & buf) if (name == CREATION_CSN_STR) { - assert(!creation_csn); + chassert(!creation_csn); creation_csn = read_csn(); } else if (name == REMOVAL_TID_STR) @@ -398,7 +398,7 @@ void VersionMetadata::read(ReadBuffer & buf) { if (removal_tid.isEmpty()) throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Found removal_csn in metadata file, but removal_tid is {}", removal_tid); - assert(!removal_csn); + chassert(!removal_csn); removal_csn = read_csn(); } else diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 2c9dd2b4934..2ddca8dce26 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1282,12 +1282,12 @@ void IMergeTreeDataPart::storeVersionMetadata() const void IMergeTreeDataPart::appendCSNToVersionMetadata(VersionMetadata::WhichCSN which_csn) const { - assert(!version.creation_tid.isEmpty()); - assert(!(which_csn == VersionMetadata::WhichCSN::CREATION && version.creation_tid.isPrehistoric())); - assert(!(which_csn == VersionMetadata::WhichCSN::CREATION && version.creation_csn == 0)); - assert(!(which_csn == VersionMetadata::WhichCSN::REMOVAL && (version.removal_tid.isPrehistoric() || version.removal_tid.isEmpty()))); - assert(!(which_csn == VersionMetadata::WhichCSN::REMOVAL && version.removal_csn == 0)); - assert(isStoredOnDisk()); + chassert(!version.creation_tid.isEmpty()); + chassert(!(which_csn == VersionMetadata::WhichCSN::CREATION && version.creation_tid.isPrehistoric())); + chassert(!(which_csn == VersionMetadata::WhichCSN::CREATION && version.creation_csn == 0)); + chassert(!(which_csn == VersionMetadata::WhichCSN::REMOVAL && (version.removal_tid.isPrehistoric() || version.removal_tid.isEmpty()))); + chassert(!(which_csn == VersionMetadata::WhichCSN::REMOVAL && version.removal_csn == 0)); + chassert(isStoredOnDisk()); /// Small enough appends to file are usually atomic, /// so we append new metadata instead of rewriting file to reduce number of fsyncs. @@ -1303,10 +1303,10 @@ void IMergeTreeDataPart::appendCSNToVersionMetadata(VersionMetadata::WhichCSN wh void IMergeTreeDataPart::appendRemovalTIDToVersionMetadata(bool clear) const { - assert(!version.creation_tid.isEmpty()); - assert(version.removal_csn == 0); - assert(!version.removal_tid.isEmpty()); - assert(isStoredOnDisk()); + chassert(!version.creation_tid.isEmpty()); + chassert(version.removal_csn == 0); + chassert(!version.removal_tid.isEmpty()); + chassert(isStoredOnDisk()); if (version.creation_tid.isPrehistoric() && !clear) { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 50811daa4ab..d2c757f6750 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1364,7 +1364,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) /// Check if CSNs were witten after committing transaction, update and write if needed. bool version_updated = false; - assert(!version.creation_tid.isEmpty()); + chassert(!version.creation_tid.isEmpty()); if (!part->version.creation_csn) { auto min = TransactionLog::getCSN(version.creation_tid); diff --git a/tests/queries/0_stateless/01133_begin_commit_race.reference b/tests/queries/0_stateless/01133_begin_commit_race.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01133_begin_commit_race.sh b/tests/queries/0_stateless/01133_begin_commit_race.sh new file mode 100755 index 00000000000..29e7ef423a1 --- /dev/null +++ b/tests/queries/0_stateless/01133_begin_commit_race.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mt"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE mt (n Int64) ENGINE=MergeTree ORDER BY n SETTINGS old_parts_lifetime=0"; + + +function begin_commit_readonly() +{ + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + COMMIT;"; +} + +function begin_rollback_readonly() +{ + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + ROLLBACK;"; +} + +function begin_insert_commit() +{ + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO mt VALUES ($RANDOM); + COMMIT;"; +} + +function introspection() +{ + $CLICKHOUSE_CLIENT -q "SELECT * FROM system.transactions FORMAT Null" + $CLICKHOUSE_CLIENT -q "SELECT transactionLatestSnapshot(), transactionOldestSnapshot() FORMAT Null" +} + +export -f begin_commit_readonly +export -f begin_rollback_readonly +export -f begin_insert_commit +export -f introspection + +TIMEOUT=20 + +clickhouse_client_loop_timeout $TIMEOUT begin_commit_readonly & +clickhouse_client_loop_timeout $TIMEOUT begin_rollback_readonly & +clickhouse_client_loop_timeout $TIMEOUT begin_insert_commit & +clickhouse_client_loop_timeout $TIMEOUT introspection & + +wait + +$CLICKHOUSE_CLIENT --query "DROP TABLE mt"; From 5d19150906430531963f5e95346db3795eb499fd Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 10:47:28 +0000 Subject: [PATCH 319/615] Try to improve short circuit functions processing --- src/Interpreters/ExpressionActions.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 2ba3453110e..9c565225d29 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -160,8 +160,12 @@ static void setLazyExecutionInfo( const ActionsDAGReverseInfo::NodeInfo & node_info = reverse_info.nodes_info[reverse_info.reverse_index.at(node)]; /// If node is used in result or it doesn't have parents, we can't enable lazy execution. - if (node_info.used_in_result || node_info.parents.empty()) + if (node_info.used_in_result || node_info.parents.empty() || node->type != ActionsDAG::ActionType::FUNCTION + || node->type != ActionsDAG::ActionType::ALIAS) + { lazy_execution_info.can_be_lazy_executed = false; + return; + } /// To fill lazy execution info for current node we need to create it for all it's parents. for (const auto & parent : node_info.parents) @@ -172,7 +176,7 @@ static void setLazyExecutionInfo( { /// Use set, because one node can be more than one argument. /// Example: expr1 AND expr2 AND expr1. - std::set indexes; + std::unordered_set indexes; for (size_t i = 0; i != parent->children.size(); ++i) { if (node == parent->children[i]) From 063f11642e0c2fe94279a3f3a3f6d673cad23940 Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 20 May 2022 10:50:38 +0000 Subject: [PATCH 320/615] update test --- .../queries/0_stateless/01078_window_view_alter_query_watch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 93e92107f48..8314db8081e 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -43,7 +43,7 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01078_window_view_alter_query_watch.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01078_window_view_alter_query_watch.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect(prompt) From 020a868810009d8944ed728537cffc6a3c84f0d4 Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 20 May 2022 10:53:06 +0000 Subject: [PATCH 321/615] rename test --- ...w_view_select.reference => 01083_window_view_select.reference} | 0 .../{01076_window_view_select.sh => 01083_window_view_select.sh} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{01076_window_view_select.reference => 01083_window_view_select.reference} (100%) rename tests/queries/0_stateless/{01076_window_view_select.sh => 01083_window_view_select.sh} (100%) diff --git a/tests/queries/0_stateless/01076_window_view_select.reference b/tests/queries/0_stateless/01083_window_view_select.reference similarity index 100% rename from tests/queries/0_stateless/01076_window_view_select.reference rename to tests/queries/0_stateless/01083_window_view_select.reference diff --git a/tests/queries/0_stateless/01076_window_view_select.sh b/tests/queries/0_stateless/01083_window_view_select.sh similarity index 100% rename from tests/queries/0_stateless/01076_window_view_select.sh rename to tests/queries/0_stateless/01083_window_view_select.sh From bb2fb48ad78c5a339f66d309c36dc9299f5082fc Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 10:53:20 +0000 Subject: [PATCH 322/615] Fix tests --- tests/queries/0_stateless/00948_values_interpreter_template.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00948_values_interpreter_template.sql b/tests/queries/0_stateless/00948_values_interpreter_template.sql index 2404c99f736..a3d2ffd7452 100644 --- a/tests/queries/0_stateless/00948_values_interpreter_template.sql +++ b/tests/queries/0_stateless/00948_values_interpreter_template.sql @@ -21,7 +21,7 @@ INSERT INTO type_names VALUES (1, toTypeName([1, 2]), toTypeName((256, -1, 3.14, -- _NUM_5: Float64 -> Int64 INSERT INTO values_template VALUES ((1), lower(replaceAll('Hella', 'a', 'o')), 1 + 2 + 3, round(-4 * 5.0), nan / CAST('42', 'Int8'), reverse([1, 2, 3])), ((2), lower(replaceAll('Warld', 'a', 'o')), -4 + 5 + 6, round(18446744073709551615 * 1e-19), 1.0 / CAST('0', 'Int8'), reverse([])), ((3), lower(replaceAll('Test', 'a', 'o')), 3 + 2 + 1, round(9223372036854775807 * -1), 6.28 / CAST('2', 'Int8'), reverse([4, 5])), ((4), lower(replaceAll('Expressians', 'a', 'o')), 6 + 5 + 4, round(1 * -9223372036854775807), 127.0 / CAST('127', 'Int8'), reverse([6, 7, 8, 9, 0])); -INSERT INTO values_template_nullable VALUES ((1), lower(replaceAll('Hella', 'a', 'o')), 1 + 2 + 3, arraySort(x -> assumeNotNull(x), [null, NULL])), ((2), lower(replaceAll('Warld', 'b', 'o')), 4 - 5 + 6, arraySort(x -> assumeNotNull(x), [+1, -1, Null])), ((3), lower(replaceAll('Test', 'c', 'o')), 3 + 2 - 1, arraySort(x -> assumeNotNull(x), [1, nUlL, 3.14])), ((4), lower(replaceAll(null, 'c', 'o')), 6 + 5 - null, arraySort(x -> assumeNotNull(x), [3, 2, 1])); +INSERT INTO values_template_nullable VALUES ((1), lower(replaceAll('Hella', 'a', 'o')), 1 + 2 + 3, arraySort(x -> assumeNotNull(x), [null, NULL::Nullable(UInt8)])), ((2), lower(replaceAll('Warld', 'b', 'o')), 4 - 5 + 6, arraySort(x -> assumeNotNull(x), [+1, -1, Null])), ((3), lower(replaceAll('Test', 'c', 'o')), 3 + 2 - 1, arraySort(x -> assumeNotNull(x), [1, nUlL, 3.14])), ((4), lower(replaceAll(null, 'c', 'o')), 6 + 5 - null, arraySort(x -> assumeNotNull(x), [3, 2, 1])); INSERT INTO values_template_fallback VALUES (1 + x); -- { clientError 62 } INSERT INTO values_template_fallback VALUES (abs(functionThatDoesNotExists(42))); -- { clientError 46 } From 23a85d34069bebe4151cc1005d0c63fa15b5ccb3 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 19 May 2022 18:47:26 +0000 Subject: [PATCH 323/615] Perserve constness in ExpressionActionsChain::JoinStep --- src/Interpreters/ExpressionActions.cpp | 9 ++---- src/Interpreters/ExpressionActions.h | 2 +- src/Interpreters/TableJoin.cpp | 29 ++++++++++++++++++- src/Interpreters/TableJoin.h | 4 +++ .../02302_clash_const_aggegate_join.reference | 6 ++++ .../02302_clash_const_aggegate_join.sql | 29 ++++++++++++++----- 6 files changed, 63 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 2ba3453110e..f0168053442 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -1034,7 +1034,7 @@ void ExpressionActionsChain::ArrayJoinStep::finalize(const NameSet & required_ou ExpressionActionsChain::JoinStep::JoinStep( std::shared_ptr analyzed_join_, JoinPtr join_, - ColumnsWithTypeAndName required_columns_) + const ColumnsWithTypeAndName & required_columns_) : Step({}) , analyzed_join(std::move(analyzed_join_)) , join(std::move(join_)) @@ -1042,11 +1042,8 @@ ExpressionActionsChain::JoinStep::JoinStep( for (const auto & column : required_columns_) required_columns.emplace_back(column.name, column.type); - NamesAndTypesList result_names_and_types = required_columns; - analyzed_join->addJoinedColumnsAndCorrectTypes(result_names_and_types, true); - for (const auto & [name, type] : result_names_and_types) - /// `column` is `nullptr` because we don't care on constness here, it may be changed in join - result_columns.emplace_back(nullptr, type, name); + result_columns = required_columns_; + analyzed_join->addJoinedColumnsAndCorrectTypes(result_columns, true); } void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_) diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index c942f33b6df..332ae941bba 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -233,7 +233,7 @@ struct ExpressionActionsChain : WithContext NamesAndTypesList required_columns; ColumnsWithTypeAndName result_columns; - JoinStep(std::shared_ptr analyzed_join_, JoinPtr join_, ColumnsWithTypeAndName required_columns_); + JoinStep(std::shared_ptr analyzed_join_, JoinPtr join_, const ColumnsWithTypeAndName & required_columns_); NamesAndTypesList getRequiredColumns() const override { return required_columns; } ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const NameSet & required_output_) override; diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 69e60e3eef7..10a27b9efc5 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -328,6 +329,21 @@ NamesAndTypesList TableJoin::correctedColumnsAddedByJoin() const void TableJoin::addJoinedColumnsAndCorrectTypes(NamesAndTypesList & left_columns, bool correct_nullability) { + addJoinedColumnsAndCorrectTypesImpl(left_columns, correct_nullability); +} + +void TableJoin::addJoinedColumnsAndCorrectTypes(ColumnsWithTypeAndName & left_columns, bool correct_nullability) +{ + addJoinedColumnsAndCorrectTypesImpl(left_columns, correct_nullability); +} + +template +void TableJoin::addJoinedColumnsAndCorrectTypesImpl(TColumns & left_columns, bool correct_nullability) +{ + static_assert(std::is_same_v || + std::is_same_v); + + constexpr bool has_column = std::is_same_v; for (auto & col : left_columns) { if (hasUsing()) @@ -342,15 +358,26 @@ void TableJoin::addJoinedColumnsAndCorrectTypes(NamesAndTypesList & left_columns inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage()); if (auto it = left_type_map.find(col.name); it != left_type_map.end()) + { col.type = it->second; + if constexpr (has_column) + col.column = nullptr; + } } if (correct_nullability && leftBecomeNullable(col.type)) + { col.type = JoinCommon::convertTypeToNullable(col.type); + if constexpr (has_column) + col.column = nullptr; + } } for (const auto & col : correctedColumnsAddedByJoin()) - left_columns.emplace_back(col.name, col.type); + if constexpr (has_column) + left_columns.emplace_back(nullptr, col.type, col.name); + else + left_columns.emplace_back(col.name, col.type); } bool TableJoin::sameStrictnessAndKind(ASTTableJoin::Strictness strictness_, ASTTableJoin::Kind kind_) const diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index c7bd80ff2b7..37e9417bde7 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -254,7 +254,11 @@ public: bool rightBecomeNullable(const DataTypePtr & column_type) const; void addJoinedColumn(const NameAndTypePair & joined_column); + template + void addJoinedColumnsAndCorrectTypesImpl(TColumns & left_columns, bool correct_nullability); + void addJoinedColumnsAndCorrectTypes(NamesAndTypesList & left_columns, bool correct_nullability); + void addJoinedColumnsAndCorrectTypes(ColumnsWithTypeAndName & left_columns, bool correct_nullability); /// Calculate converting actions, rename key columns in required /// For `USING` join we will convert key columns inplace and affect into types in the result table diff --git a/tests/queries/0_stateless/02302_clash_const_aggegate_join.reference b/tests/queries/0_stateless/02302_clash_const_aggegate_join.reference index d00491fd7e5..bfa283c3478 100644 --- a/tests/queries/0_stateless/02302_clash_const_aggegate_join.reference +++ b/tests/queries/0_stateless/02302_clash_const_aggegate_join.reference @@ -1 +1,7 @@ +0 +1970-01-01 00:00:00 +0 +2020-01-01 00:00:00 + + 1 diff --git a/tests/queries/0_stateless/02302_clash_const_aggegate_join.sql b/tests/queries/0_stateless/02302_clash_const_aggegate_join.sql index 979a19340d0..32c602e0d36 100644 --- a/tests/queries/0_stateless/02302_clash_const_aggegate_join.sql +++ b/tests/queries/0_stateless/02302_clash_const_aggegate_join.sql @@ -1,15 +1,28 @@ DROP TABLE IF EXISTS e; - -- https://github.com/ClickHouse/ClickHouse/issues/36891 CREATE TABLE e ( a UInt64, t DateTime ) ENGINE = MergeTree PARTITION BY toDate(t) ORDER BY tuple(); - INSERT INTO e SELECT 1, toDateTime('2020-02-01 12:00:01') + INTERVAL number MONTH FROM numbers(10); -SELECT any('1') -FROM e JOIN ( SELECT 1 :: UInt32 AS key) AS da ON key = a -PREWHERE toString(a) = '1'; +SELECT sumIf( 1, if( 1, toDateTime('2020-01-01 00:00:00', 'UTC'), toDateTime('1970-01-01 00:00:00', 'UTC')) > t ) +FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a +WHERE t >= toDateTime('2021-07-19T13:00:00', 'UTC') AND t <= toDateTime('2021-07-19T13:59:59', 'UTC'); + +SELECT any( toDateTime('2020-01-01T00:00:00', 'UTC')) +FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a +PREWHERE t >= toDateTime('2021-07-19T13:00:00', 'UTC'); + +SELECT sumIf( 1, if( 1, toDateTime('2020-01-01 00:00:00', 'UTC'), toDateTime('1970-01-01 00:00:00', 'UTC')) > t ) +FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a +WHERE t >= toDateTime('2020-01-01 00:00:00', 'UTC') AND t <= toDateTime('2021-07-19T13:59:59', 'UTC'); + +SELECT any(toDateTime('2020-01-01 00:00:00')) +FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a +PREWHERE t >= toDateTime('2020-01-01 00:00:00'); + +SELECT any('2020-01-01 00:00:00') FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a PREWHERE t = '2020-01-01 00:00:00'; + +SELECT any('x') FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a PREWHERE toString(a) = 'x'; + +SELECT any('1') FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a PREWHERE toString(a) = '1'; --- SELECT sumIf( 1, if( 1, toDateTime('2020-01-01 00:00:00', 'UTC'), toDateTime('1970-01-01 00:00:00', 'UTC')) > t ) --- FROM e JOIN ( SELECT 1 joinKey) AS da ON joinKey = a --- WHERE t >= toDateTime('2021-07-19T13:00:00', 'UTC') AND t <= toDateTime('2021-07-19T13:59:59', 'UTC'); From 339980469fed6bb81bf9c44f804da28791a2f551 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 19 May 2022 18:57:33 +0000 Subject: [PATCH 324/615] Revert "Swap order of converting_join_columns and before_join steps" This reverts commit 2995b69f4a954910499248d217d387c2c97eaa5c. --- src/Interpreters/ExpressionAnalyzer.cpp | 11 +++++------ src/Interpreters/ExpressionAnalyzer.h | 4 ++-- src/Interpreters/InterpreterSelectQuery.cpp | 18 +++++++++--------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 14bb8f8e8c2..0b1154f6fd1 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -976,16 +976,16 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi return array_join; } -void SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types) +bool SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types) { ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions()); + return true; } -JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns, ActionsDAGPtr & before_join) +JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns) { - const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); JoinPtr table_join = makeTableJoin(*syntax->ast_join, left_sample_columns, converting_join_columns); @@ -995,8 +995,6 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain chain.addStep(); } - before_join = chain.getLastActions(); - ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); chain.steps.push_back(std::make_unique( syntax->analyzed_join, table_join, step.getResultColumns())); @@ -1764,7 +1762,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (query_analyzer.hasTableJoin()) { query_analyzer.appendJoinLeftKeys(chain, only_types || !first_stage); - join = query_analyzer.appendJoin(chain, converting_join_columns, before_join); + before_join = chain.getLastActions(); + join = query_analyzer.appendJoin(chain, converting_join_columns); chain.addStep(); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index ef97fbf175a..85efb3829d0 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -377,8 +377,8 @@ private: /// Before aggregation: ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types); - void appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); - JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns, ActionsDAGPtr & before_join); + bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); + JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns); /// remove_filter is set in ExpressionActionsChain::finalize(); /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cfb64366e9b..6bfadc66352 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1251,6 +1251,15 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( + query_plan.getCurrentDataStream(), + expressions.before_join); + before_join_step->setStepDescription("Before JOIN"); + query_plan.addStep(std::move(before_join_step)); + } + /// Optional step to convert key columns to common supertype. if (expressions.converting_join_columns) { @@ -1261,15 +1270,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( - query_plan.getCurrentDataStream(), - expressions.before_join); - before_join_step->setStepDescription("Before JOIN"); - query_plan.addStep(std::move(before_join_step)); - } - if (expressions.hasJoin()) { if (expressions.join->isFilled()) From d2304f5d152f93bcf95c18b4dd0286a666c72614 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 12:07:29 +0000 Subject: [PATCH 325/615] Make better --- docs/en/interfaces/formats.md | 5 +-- .../Impl/JSONColumnsBlockInputFormat.cpp | 6 ++-- .../Impl/JSONColumnsBlockInputFormat.h | 4 +-- ...pp => JSONColumnsBlockInputFormatBase.cpp} | 32 +++++++++---------- ...at.h => JSONColumnsBlockInputFormatBase.h} | 32 +++++++++---------- .../Impl/JSONColumnsBlockOutputFormat.cpp | 2 +- .../Impl/JSONColumnsBlockOutputFormat.h | 4 +-- ...p => JSONColumnsBlockOutputFormatBase.cpp} | 14 ++++---- ...t.h => JSONColumnsBlockOutputFormatBase.h} | 8 ++--- ...ONColumnsWithMetadataBlockOutputFormat.cpp | 2 +- .../JSONCompactColumnsBlockInputFormat.cpp | 6 ++-- .../Impl/JSONCompactColumnsBlockInputFormat.h | 4 +-- .../JSONCompactColumnsBlockOutputFormat.cpp | 2 +- .../JSONCompactColumnsBlockOutputFormat.h | 6 ++-- 14 files changed, 64 insertions(+), 63 deletions(-) rename src/Processors/Formats/Impl/{JSONColumnsBaseBlockInputFormat.cpp => JSONColumnsBlockInputFormatBase.cpp} (89%) rename src/Processors/Formats/Impl/{JSONColumnsBaseBlockInputFormat.h => JSONColumnsBlockInputFormatBase.h} (69%) rename src/Processors/Formats/Impl/{JSONColumnsBaseBlockOutputFormat.cpp => JSONColumnsBlockOutputFormatBase.cpp} (72%) rename src/Processors/Formats/Impl/{JSONColumnsBaseBlockOutputFormat.h => JSONColumnsBlockOutputFormatBase.h} (77%) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index c248c6644cb..31f948cbb00 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -403,7 +403,7 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. -For input format columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. ## CSV {#csv} @@ -599,7 +599,7 @@ Example: } ``` -Columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) @@ -621,6 +621,7 @@ Example: "name": "str", "type": "String" }, + { "name": "arr", "type": "Array(UInt8)" diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp index 2f924be2c14..935462a6fe4 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp @@ -5,7 +5,7 @@ namespace DB { -JSONColumnsReader::JSONColumnsReader(ReadBuffer & in_) : JSONColumnsBaseReader(in_) +JSONColumnsReader::JSONColumnsReader(ReadBuffer & in_) : JSONColumnsReaderBase(in_) { } @@ -51,7 +51,7 @@ void registerInputFormatJSONColumns(FormatFactory & factory) const RowInputFormatParams &, const FormatSettings & settings) { - return std::make_shared(buf, sample, settings, std::make_unique(buf)); + return std::make_shared(buf, sample, settings, std::make_unique(buf)); } ); } @@ -62,7 +62,7 @@ void registerJSONColumnsSchemaReader(FormatFactory & factory) "JSONColumns", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf, settings, std::make_unique(buf)); + return std::make_shared(buf, settings, std::make_unique(buf)); } ); } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h index 382da947c03..f8b8a80731e 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { @@ -12,7 +12,7 @@ namespace DB * ... * } */ -class JSONColumnsReader : public JSONColumnsBaseReader +class JSONColumnsReader : public JSONColumnsReaderBase { public: JSONColumnsReader(ReadBuffer & in_); diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp similarity index 89% rename from src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp rename to src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index 0e4e74e14c4..cdde87f2cf6 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,11 +13,11 @@ namespace ErrorCodes } -JSONColumnsBaseReader::JSONColumnsBaseReader(ReadBuffer & in_) : in(&in_) +JSONColumnsReaderBase::JSONColumnsReaderBase(ReadBuffer & in_) : in(&in_) { } -bool JSONColumnsBaseReader::checkColumnEnd() +bool JSONColumnsReaderBase::checkColumnEnd() { skipWhitespaceIfAny(*in); if (!in->eof() && *in->position() == ']') @@ -29,7 +29,7 @@ bool JSONColumnsBaseReader::checkColumnEnd() return false; } -bool JSONColumnsBaseReader::checkColumnEndOrSkipFieldDelimiter() +bool JSONColumnsReaderBase::checkColumnEndOrSkipFieldDelimiter() { if (checkColumnEnd()) return true; @@ -39,7 +39,7 @@ bool JSONColumnsBaseReader::checkColumnEndOrSkipFieldDelimiter() return false; } -bool JSONColumnsBaseReader::checkChunkEndOrSkipColumnDelimiter() +bool JSONColumnsReaderBase::checkChunkEndOrSkipColumnDelimiter() { if (checkChunkEnd()) return true; @@ -49,7 +49,7 @@ bool JSONColumnsBaseReader::checkChunkEndOrSkipColumnDelimiter() return false; } -void JSONColumnsBaseReader::skipColumn() +void JSONColumnsReaderBase::skipColumn() { /// We assume that we already read '[', so we should skip until matched ']'. size_t balance = 1; @@ -76,8 +76,8 @@ void JSONColumnsBaseReader::skipColumn() } } -JSONColumnsBaseBlockInputFormat::JSONColumnsBaseBlockInputFormat( - ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr reader_) +JSONColumnsBlockInputFormatBase::JSONColumnsBlockInputFormatBase( + ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr reader_) : IInputFormat(header_, in_) , format_settings(format_settings_) , fields(header_.getNamesAndTypes()) @@ -87,7 +87,7 @@ JSONColumnsBaseBlockInputFormat::JSONColumnsBaseBlockInputFormat( { } -size_t JSONColumnsBaseBlockInputFormat::readColumn( +size_t JSONColumnsBlockInputFormatBase::readColumn( IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name) { /// Check for empty column. @@ -103,13 +103,13 @@ size_t JSONColumnsBaseBlockInputFormat::readColumn( return column.size(); } -void JSONColumnsBaseBlockInputFormat::setReadBuffer(ReadBuffer & in_) +void JSONColumnsBlockInputFormatBase::setReadBuffer(ReadBuffer & in_) { reader->setReadBuffer(in_); IInputFormat::setReadBuffer(in_); } -Chunk JSONColumnsBaseBlockInputFormat::generate() +Chunk JSONColumnsBlockInputFormatBase::generate() { MutableColumns columns = getPort().getHeader().cloneEmptyColumns(); block_missing_values.clear(); @@ -175,13 +175,13 @@ Chunk JSONColumnsBaseBlockInputFormat::generate() return Chunk(std::move(columns), rows); } -JSONColumnsBaseSchemaReader::JSONColumnsBaseSchemaReader( - ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_) +JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase( + ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_) : ISchemaReader(in_), format_settings(format_settings_), reader(std::move(reader_)) { } -void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const +void JSONColumnsSchemaReaderBase::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const { auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second) { @@ -190,7 +190,7 @@ void JSONColumnsBaseSchemaReader::chooseResulType(DataTypePtr & type, const Data chooseResultColumnType(type, new_type, common_type_checker, nullptr, column_name, row); } -NamesAndTypesList JSONColumnsBaseSchemaReader::readSchema() +NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() { size_t total_rows_read = 0; std::unordered_map names_to_types; @@ -242,7 +242,7 @@ NamesAndTypesList JSONColumnsBaseSchemaReader::readSchema() return result; } -DataTypePtr JSONColumnsBaseSchemaReader::readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read) +DataTypePtr JSONColumnsSchemaReaderBase::readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read) { /// Check for empty column. if (reader->checkColumnEnd()) diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h similarity index 69% rename from src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h rename to src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index e912ec4c08e..ac746a2e2d1 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockInputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -12,12 +12,12 @@ class ReadBuffer; /// Base class for reading data in Columnar JSON formats. -class JSONColumnsBaseReader +class JSONColumnsReaderBase { public: - JSONColumnsBaseReader(ReadBuffer & in_); + JSONColumnsReaderBase(ReadBuffer & in_); - virtual ~JSONColumnsBaseReader() = default; + virtual ~JSONColumnsReaderBase() = default; void setReadBuffer(ReadBuffer & in_) { in = &in_; } @@ -38,15 +38,15 @@ protected: /// Base class for Columnar JSON input formats. It works with data using -/// JSONColumnsBaseReader interface. -/// To implement new Columnar JSON format you need to implement new JSONColumnsBaseReader -/// interface and provide it to JSONColumnsBaseBlockInputFormat. -class JSONColumnsBaseBlockInputFormat : public IInputFormat +/// JSONColumnsReaderBase interface. +/// To implement new Columnar JSON format you need to implement new JSONColumnsReaderBase +/// interface and provide it to JSONColumnsBlockInputFormatBase. +class JSONColumnsBlockInputFormatBase : public IInputFormat { public: - JSONColumnsBaseBlockInputFormat(ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr reader_); + JSONColumnsBlockInputFormatBase(ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr reader_); - String getName() const override { return "JSONColumnsBaseBlockInputFormat"; } + String getName() const override { return "JSONColumnsBlockInputFormatBase"; } void setReadBuffer(ReadBuffer & in_) override; @@ -62,19 +62,19 @@ protected: /// Maps column names and their positions in header. std::unordered_map name_to_index; Serializations serializations; - std::unique_ptr reader; + std::unique_ptr reader; BlockMissingValues block_missing_values; }; /// Base class for schema inference from Columnar JSON input formats. It works with data using -/// JSONColumnsBaseReader interface. -/// To implement schema reader for the new Columnar JSON format you need to implement new JSONColumnsBaseReader -/// interface and provide it to JSONColumnsBaseSchemaReader. -class JSONColumnsBaseSchemaReader : public ISchemaReader +/// JSONColumnsReaderBase interface. +/// To implement schema reader for the new Columnar JSON format you need to implement new JSONColumnsReaderBase +/// interface and provide it to JSONColumnsSchemaReaderBase. +class JSONColumnsSchemaReaderBase : public ISchemaReader { public: - JSONColumnsBaseSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_); + JSONColumnsSchemaReaderBase(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_); private: NamesAndTypesList readSchema() override; @@ -86,7 +86,7 @@ private: void chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const; const FormatSettings format_settings; - std::unique_ptr reader; + std::unique_ptr reader; }; } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp index bf8c50b923d..dd8688c655e 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp @@ -8,7 +8,7 @@ namespace DB { JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, size_t indent_) - : JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_), fields(header_.getNamesAndTypes()), indent(indent_) + : JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_), fields(header_.getNamesAndTypes()), indent(indent_) { for (auto & field : fields) { diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h index afac6d9f223..e52f5f61aec 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { @@ -12,7 +12,7 @@ namespace DB * ... * } */ -class JSONColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat +class JSONColumnsBlockOutputFormat : public JSONColumnsBlockOutputFormatBase { public: JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, size_t indent_ = 0); diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp similarity index 72% rename from src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp rename to src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp index 832f65e4463..8e83282408b 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -6,7 +6,7 @@ namespace DB { -JSONColumnsBaseBlockOutputFormat::JSONColumnsBaseBlockOutputFormat( +JSONColumnsBlockOutputFormatBase::JSONColumnsBlockOutputFormatBase( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_) , format_settings(format_settings_) @@ -15,7 +15,7 @@ JSONColumnsBaseBlockOutputFormat::JSONColumnsBaseBlockOutputFormat( { } -void JSONColumnsBaseBlockOutputFormat::consume(Chunk chunk) +void JSONColumnsBlockOutputFormatBase::consume(Chunk chunk) { if (!mono_chunk) { @@ -26,14 +26,14 @@ void JSONColumnsBaseBlockOutputFormat::consume(Chunk chunk) mono_chunk.append(chunk); } -void JSONColumnsBaseBlockOutputFormat::writeSuffix() +void JSONColumnsBlockOutputFormatBase::writeSuffix() { writeChunk(mono_chunk); mono_chunk.clear(); } -void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk) +void JSONColumnsBlockOutputFormatBase::writeChunk(Chunk & chunk) { writeChunkStart(); const auto & columns = chunk.getColumns(); @@ -46,14 +46,14 @@ void JSONColumnsBaseBlockOutputFormat::writeChunk(Chunk & chunk) writeChunkEnd(); } -void JSONColumnsBaseBlockOutputFormat::writeColumnEnd(bool is_last) +void JSONColumnsBlockOutputFormatBase::writeColumnEnd(bool is_last) { JSONUtils::writeCompactArrayEnd(*ostr); if (!is_last) JSONUtils::writeFieldDelimiter(*ostr); } -void JSONColumnsBaseBlockOutputFormat::writeColumn(const IColumn & column, const ISerialization & serialization) +void JSONColumnsBlockOutputFormatBase::writeColumn(const IColumn & column, const ISerialization & serialization) { for (size_t i = 0; i != column.size(); ++i) { diff --git a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h similarity index 77% rename from src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h rename to src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h index c7bb0edb207..133979523f9 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBaseBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h @@ -12,12 +12,12 @@ class WriteBuffer; /// Base class for Columnar JSON output formats. /// It buffers all data and outputs it as a single block in writeSuffix() method. -class JSONColumnsBaseBlockOutputFormat : public IOutputFormat +class JSONColumnsBlockOutputFormatBase : public IOutputFormat { public: - JSONColumnsBaseBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + JSONColumnsBlockOutputFormatBase(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); - String getName() const override { return "JSONColumnsBaseBlockOutputFormat"; } + String getName() const override { return "JSONColumnsBlockOutputFormatBase"; } protected: void consume(Chunk chunk) override; @@ -32,7 +32,7 @@ protected: void writeColumnEnd(bool is_last); const FormatSettings format_settings; - Serializations serializations; + const Serializations serializations; WriteBuffer * ostr; diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index 1887a10e9f7..394385e548d 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -34,7 +34,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::writePrefix() void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix() { rows = mono_chunk.getNumRows(); - JSONColumnsBaseBlockOutputFormat::writeSuffix(); + JSONColumnsBlockOutputFormatBase::writeSuffix(); } void JSONColumnsWithMetadataBlockOutputFormat::writeChunkStart() diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp index 15db9979e45..5b26ee2677b 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp @@ -5,7 +5,7 @@ namespace DB { -JSONCompactColumnsReader::JSONCompactColumnsReader(ReadBuffer & in_) : JSONColumnsBaseReader(in_) +JSONCompactColumnsReader::JSONCompactColumnsReader(ReadBuffer & in_) : JSONColumnsReaderBase(in_) { } @@ -46,7 +46,7 @@ void registerInputFormatJSONCompactColumns(FormatFactory & factory) const RowInputFormatParams &, const FormatSettings & settings) { - return std::make_shared(buf, sample, settings, std::make_unique(buf)); + return std::make_shared(buf, sample, settings, std::make_unique(buf)); } ); } @@ -57,7 +57,7 @@ void registerJSONCompactColumnsSchemaReader(FormatFactory & factory) "JSONCompactColumns", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf, settings, std::make_unique(buf)); + return std::make_shared(buf, settings, std::make_unique(buf)); } ); } diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h index ab3824a3f9b..7f23e127ab4 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { @@ -12,7 +12,7 @@ namespace DB * ... * ] */ -class JSONCompactColumnsReader : public JSONColumnsBaseReader +class JSONCompactColumnsReader : public JSONColumnsReaderBase { public: JSONCompactColumnsReader(ReadBuffer & in_); diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp index 6f240d5e922..757345cbbe0 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp @@ -7,7 +7,7 @@ namespace DB { JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : JSONColumnsBaseBlockOutputFormat(out_, header_, format_settings_), column_names(header_.getNames()) + : JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_), column_names(header_.getNames()) { } diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h index a1f2079d297..49612ed67f6 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h @@ -1,5 +1,5 @@ #pragma once -#include +#include namespace DB { @@ -11,7 +11,7 @@ namespace DB * ... * ] */ -class JSONCompactColumnsBlockOutputFormat : public JSONColumnsBaseBlockOutputFormat +class JSONCompactColumnsBlockOutputFormat : public JSONColumnsBlockOutputFormatBase { public: JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); @@ -24,7 +24,7 @@ protected: void writeColumnStart(size_t column_index) override; - Names column_names; + const Names column_names; }; } From 44726122bbf01dedb1bdc76f059c9ecc5071157d Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 12:09:51 +0000 Subject: [PATCH 326/615] Join JSON registration --- src/Formats/registerFormats.cpp | 41 +++++++++++++++++---------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 9d0bf663715..8493c84173d 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -38,6 +38,10 @@ void registerInputFormatJSONEachRow(FormatFactory & factory); void registerOutputFormatJSONEachRow(FormatFactory & factory); void registerInputFormatJSONCompactEachRow(FormatFactory & factory); void registerOutputFormatJSONCompactEachRow(FormatFactory & factory); +void registerInputFormatJSONColumns(FormatFactory & factory); +void registerOutputFormatJSONColumns(FormatFactory & factory); +void registerInputFormatJSONCompactColumns(FormatFactory & factory); +void registerOutputFormatJSONCompactColumns(FormatFactory & factory); void registerInputFormatProtobuf(FormatFactory & factory); void registerOutputFormatProtobuf(FormatFactory & factory); void registerInputFormatProtobufList(FormatFactory & factory); @@ -60,10 +64,6 @@ void registerInputFormatCustomSeparated(FormatFactory & factory); void registerOutputFormatCustomSeparated(FormatFactory & factory); void registerInputFormatCapnProto(FormatFactory & factory); void registerOutputFormatCapnProto(FormatFactory & factory); -void registerInputFormatJSONColumns(FormatFactory & factory); -void registerOutputFormatJSONColumns(FormatFactory & factory); -void registerInputFormatJSONCompactColumns(FormatFactory & factory); -void registerOutputFormatJSONCompactColumns(FormatFactory & factory); /// Output only (presentational) formats. @@ -74,6 +74,7 @@ void registerOutputFormatVertical(FormatFactory & factory); void registerOutputFormatJSON(FormatFactory & factory); void registerOutputFormatJSONCompact(FormatFactory & factory); void registerOutputFormatJSONEachRowWithProgress(FormatFactory & factory); +void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory); void registerOutputFormatXML(FormatFactory & factory); void registerOutputFormatODBCDriver2(FormatFactory & factory); void registerOutputFormatNull(FormatFactory & factory); @@ -81,7 +82,6 @@ void registerOutputFormatMySQLWire(FormatFactory & factory); void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); void registerOutputFormatPrometheus(FormatFactory & factory); -void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory); /// Input only formats. @@ -107,14 +107,16 @@ void registerTSVSchemaReader(FormatFactory & factory); void registerCSVSchemaReader(FormatFactory & factory); void registerJSONCompactEachRowSchemaReader(FormatFactory & factory); void registerJSONEachRowSchemaReader(FormatFactory & factory); +void registerJSONAsStringSchemaReader(FormatFactory & factory); +void registerJSONAsObjectSchemaReader(FormatFactory & factory); +void registerJSONColumnsSchemaReader(FormatFactory & factory); +void registerJSONCompactColumnsSchemaReader(FormatFactory & factory); void registerNativeSchemaReader(FormatFactory & factory); void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory); void registerAvroSchemaReader(FormatFactory & factory); void registerProtobufSchemaReader(FormatFactory & factory); void registerProtobufListSchemaReader(FormatFactory & factory); void registerLineAsStringSchemaReader(FormatFactory & factory); -void registerJSONAsStringSchemaReader(FormatFactory & factory); -void registerJSONAsObjectSchemaReader(FormatFactory & factory); void registerRawBLOBSchemaReader(FormatFactory & factory); void registerMsgPackSchemaReader(FormatFactory & factory); void registerCapnProtoSchemaReader(FormatFactory & factory); @@ -124,8 +126,7 @@ void registerTSKVSchemaReader(FormatFactory & factory); void registerValuesSchemaReader(FormatFactory & factory); void registerTemplateSchemaReader(FormatFactory & factory); void registerMySQLSchemaReader(FormatFactory & factory); -void registerJSONColumnsSchemaReader(FormatFactory & factory); -void registerJSONCompactColumnsSchemaReader(FormatFactory & factory); + void registerFileExtensions(FormatFactory & factory); @@ -135,8 +136,8 @@ void registerFormats() registerFileSegmentationEngineTabSeparated(factory); registerFileSegmentationEngineCSV(factory); - registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineRegexp(factory); + registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineJSONAsString(factory); registerFileSegmentationEngineJSONAsObject(factory); registerFileSegmentationEngineJSONCompactEachRow(factory); @@ -162,6 +163,10 @@ void registerFormats() registerOutputFormatJSONEachRow(factory); registerInputFormatJSONCompactEachRow(factory); registerOutputFormatJSONCompactEachRow(factory); + registerInputFormatJSONColumns(factory); + registerOutputFormatJSONColumns(factory); + registerInputFormatJSONCompactColumns(factory); + registerOutputFormatJSONCompactColumns(factory); registerInputFormatProtobuf(factory); registerOutputFormatProtobufList(factory); registerInputFormatProtobufList(factory); @@ -183,10 +188,6 @@ void registerFormats() registerOutputFormatAvro(factory); registerInputFormatArrow(factory); registerOutputFormatArrow(factory); - registerInputFormatJSONColumns(factory); - registerOutputFormatJSONColumns(factory); - registerInputFormatJSONCompactColumns(factory); - registerOutputFormatJSONCompactColumns(factory); registerOutputFormatPretty(factory); registerOutputFormatPrettyCompact(factory); @@ -195,6 +196,7 @@ void registerFormats() registerOutputFormatJSON(factory); registerOutputFormatJSONCompact(factory); registerOutputFormatJSONEachRowWithProgress(factory); + registerOutputFormatJSONColumnsWithMetadata(factory); registerOutputFormatXML(factory); registerOutputFormatODBCDriver2(factory); registerOutputFormatNull(factory); @@ -203,12 +205,11 @@ void registerFormats() registerOutputFormatPostgreSQLWire(factory); registerOutputFormatCapnProto(factory); registerOutputFormatPrometheus(factory); - registerOutputFormatJSONColumnsWithMetadata(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); - registerInputFormatLineAsString(factory); registerInputFormatJSONAsObject(factory); + registerInputFormatLineAsString(factory); #if USE_HIVE registerInputFormatHiveText(factory); #endif @@ -227,14 +228,16 @@ void registerFormats() registerCSVSchemaReader(factory); registerJSONCompactEachRowSchemaReader(factory); registerJSONEachRowSchemaReader(factory); + registerJSONAsStringSchemaReader(factory); + registerJSONAsObjectSchemaReader(factory); + registerJSONColumnsSchemaReader(factory); + registerJSONCompactColumnsSchemaReader(factory); registerNativeSchemaReader(factory); registerRowBinaryWithNamesAndTypesSchemaReader(factory); registerAvroSchemaReader(factory); registerProtobufSchemaReader(factory); registerProtobufListSchemaReader(factory); registerLineAsStringSchemaReader(factory); - registerJSONAsStringSchemaReader(factory); - registerJSONAsObjectSchemaReader(factory); registerRawBLOBSchemaReader(factory); registerMsgPackSchemaReader(factory); registerCapnProtoSchemaReader(factory); @@ -244,8 +247,6 @@ void registerFormats() registerValuesSchemaReader(factory); registerTemplateSchemaReader(factory); registerMySQLSchemaReader(factory); - registerJSONColumnsSchemaReader(factory); - registerJSONCompactColumnsSchemaReader(factory); } } From 962b72ebf019033052dcd5e20c9517e58be071f1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 20 May 2022 08:10:29 -0400 Subject: [PATCH 327/615] unused argument removed --- src/Parsers/Access/ParserCreateUserQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 532a1bae47f..f5a1c6869d2 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -46,7 +46,7 @@ namespace } - bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, bool /*id_mode*/, AuthenticationData & auth_data) + bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, AuthenticationData & auth_data) { return IParserBase::wrapParseImpl(pos, [&] { @@ -447,7 +447,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!auth_data) { AuthenticationData new_auth_data; - if (parseAuthenticationData(pos, expected, attach_mode, new_auth_data)) + if (parseAuthenticationData(pos, expected, new_auth_data)) { auth_data = std::move(new_auth_data); continue; From 1fdcb9b727545e5ec61c66681bea1f3b77cae2c0 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 20 May 2022 08:41:27 -0400 Subject: [PATCH 328/615] test added --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index f8de8ed90e6..5b6d49e2741 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -261,10 +261,18 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'", "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '[A-Za-z0-9]{64}' SALT '[A-Za-z0-9]{64}'" }, + { + "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'", + "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'" + }, { "ALTER USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'", "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '[A-Za-z0-9]{64}' SALT '[A-Za-z0-9]{64}'" }, + { + "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'", + "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'" + }, { "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123' SALT 'EFFD7F6B03B3EA68B8F86C1E91614DD50E42EB31EF7160524916444D58B5E264'", "throws Syntax error" From 3884819bd1501e9553cb7e6187b0b58d019728e2 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 12:48:58 +0000 Subject: [PATCH 329/615] Fix --- src/Interpreters/ExpressionActions.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 9c565225d29..fddbc40f8ae 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -160,8 +160,7 @@ static void setLazyExecutionInfo( const ActionsDAGReverseInfo::NodeInfo & node_info = reverse_info.nodes_info[reverse_info.reverse_index.at(node)]; /// If node is used in result or it doesn't have parents, we can't enable lazy execution. - if (node_info.used_in_result || node_info.parents.empty() || node->type != ActionsDAG::ActionType::FUNCTION - || node->type != ActionsDAG::ActionType::ALIAS) + if (node_info.used_in_result || node_info.parents.empty() || (node->type != ActionsDAG::ActionType::FUNCTION && node->type != ActionsDAG::ActionType::ALIAS)) { lazy_execution_info.can_be_lazy_executed = false; return; From 6b621440907b75fa820db16b920492941326e321 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 20 May 2022 08:57:18 -0400 Subject: [PATCH 330/615] docs spelling fix --- docs/en/sql-reference/statements/create/user.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 588344520af..34f0a13147c 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -35,7 +35,7 @@ There are multiple ways of user identification: - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` -For identifacation with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'. +For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'. ## User Host {#user-host} From 22b072bc7f85690d95953decc6ed0a0f6fca3974 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 13:04:43 +0000 Subject: [PATCH 331/615] Make better --- src/Interpreters/ExpressionActions.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index fddbc40f8ae..373c09ddd3c 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -297,6 +297,10 @@ static std::unordered_set processShortCircuitFunctions short_circuit_nodes[&node] = short_circuit_settings; } + /// If there is no short-circuit functions, no need to do anything. + if (short_circuit_nodes.empty()) + return {}; + auto reverse_info = getActionsDAGReverseInfo(nodes, actions_dag.getIndex()); /// For each node we fill LazyExecutionInfo. From 0f6715bd91f199c2b9a162e9c6fd04a8d481a0bb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 20 May 2022 10:14:03 +0200 Subject: [PATCH 332/615] Follow-up to PR #37300: semicolon warnings In PR #37300, Alexej asked why we the compiler does not warn about unnecessary semicolons, e.g. f() { }; // <-- here The answer is surprising: In C++98, above syntax was disallowed but by most compilers accepted it regardless. C++>11 introduced "empty declarations" which made the syntax legal. The previous behavior can be restored using flag -Wc++98-compat-extra-semi. This finds many useless semicolons which were removed in this change. Unfortunately, there are also false positives which would require #pragma-s and HAS_* logic (--> check_flags.cmake) to suppress. In the end, -Wc++98-compat-extra-semi comes with extra effort for little benefit. Therefore, this change only fixes some semicolons but does not enable the flag. --- src/Client/HedgedConnections.cpp | 2 +- src/Common/tests/gtest_lru_file_cache.cpp | 2 +- src/Common/tests/gtest_sensitive_data_masker.cpp | 2 +- src/Compression/tests/gtest_compressionCodec.cpp | 2 +- src/DataTypes/tests/gtest_data_type_get_common_type.cpp | 4 ++-- src/Disks/IDiskRemote.cpp | 2 +- src/Functions/SubtractSubSeconds.cpp | 6 +++--- src/Interpreters/SessionLog.cpp | 2 +- src/Storages/ExecutableSettings.cpp | 2 +- src/Storages/IndicesDescription.cpp | 2 +- .../MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp | 2 +- .../MeiliSearch/MeiliSearchColumnDescriptionFetcher.h | 2 +- src/Storages/MergeTree/MergeTreeIndexFullText.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp | 4 ++-- src/Storages/ProjectionsDescription.cpp | 2 +- 16 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 75f25263b6e..9f0ead79981 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -345,7 +345,7 @@ HedgedConnections::ReplicaLocation HedgedConnections::getReadyReplicaLocation(As else throw Exception("Unknown event from epoll", ErrorCodes::LOGICAL_ERROR); } -}; +} bool HedgedConnections::resumePacketReceiver(const HedgedConnections::ReplicaLocation & location) { diff --git a/src/Common/tests/gtest_lru_file_cache.cpp b/src/Common/tests/gtest_lru_file_cache.cpp index 24e69259241..36137e02a84 100644 --- a/src/Common/tests/gtest_lru_file_cache.cpp +++ b/src/Common/tests/gtest_lru_file_cache.cpp @@ -32,7 +32,7 @@ void assertRange( ASSERT_EQ(range.left, expected_range.left); ASSERT_EQ(range.right, expected_range.right); ASSERT_EQ(file_segment->state(), expected_state); -}; +} void printRanges(const auto & segments) { diff --git a/src/Common/tests/gtest_sensitive_data_masker.cpp b/src/Common/tests/gtest_sensitive_data_masker.cpp index 7ebf141d961..b9ee9025c03 100644 --- a/src/Common/tests/gtest_sensitive_data_masker.cpp +++ b/src/Common/tests/gtest_sensitive_data_masker.cpp @@ -22,7 +22,7 @@ extern const int CANNOT_COMPILE_REGEXP; extern const int NO_ELEMENTS_IN_CONFIG; extern const int INVALID_CONFIG_PARAMETER; } -}; +} TEST(Common, SensitiveDataMasker) diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 2d26cfcd5e1..77050908265 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -790,7 +790,7 @@ std::vector generatePyramidOfSequences(const size_t sequences } return sequences; -}; +} // helper macro to produce human-friendly sequence name from generator #define G(generator) generator, #generator diff --git a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp index 2a77237e982..a85606618a3 100644 --- a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp +++ b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp @@ -22,7 +22,7 @@ static auto typeFromString(const std::string & str) { auto & data_type_factory = DataTypeFactory::instance(); return data_type_factory.get(str); -}; +} static auto typesFromString(const std::string & str) { @@ -33,7 +33,7 @@ static auto typesFromString(const std::string & str) data_types.push_back(typeFromString(data_type)); return data_types; -}; +} struct TypesTestCase { diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index 326ee88eea6..00d36bcc624 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes extern const int INCORRECT_DISK_INDEX; extern const int UNKNOWN_FORMAT; extern const int FILE_ALREADY_EXISTS; - extern const int PATH_ACCESS_DENIED;; + extern const int PATH_ACCESS_DENIED; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; } diff --git a/src/Functions/SubtractSubSeconds.cpp b/src/Functions/SubtractSubSeconds.cpp index 5eeb24c8748..ac3a66bfc2d 100644 --- a/src/Functions/SubtractSubSeconds.cpp +++ b/src/Functions/SubtractSubSeconds.cpp @@ -9,19 +9,19 @@ using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval(); -}; +} using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionSubtractMicroseconds(FunctionFactory & factory) { factory.registerFunction(); -}; +} using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionSubtractMilliseconds(FunctionFactory & factory) { factory.registerFunction(); -}; +} } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 7772c8cdb3b..3523b2f1be0 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -61,7 +61,7 @@ void fillColumnArray(const Strings & data, IColumn & column) } auto & offsets = array.getOffsets(); offsets.push_back(offsets.back() + size); -}; +} } diff --git a/src/Storages/ExecutableSettings.cpp b/src/Storages/ExecutableSettings.cpp index 136357eb6f8..dc462350a06 100644 --- a/src/Storages/ExecutableSettings.cpp +++ b/src/Storages/ExecutableSettings.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -IMPLEMENT_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS); +IMPLEMENT_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS) void ExecutableSettings::loadFromQuery(ASTStorage & storage_def) { diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index a0a1bcbce2d..68cf6dfbb28 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes { extern const int INCORRECT_QUERY; extern const int LOGICAL_ERROR; -}; +} IndexDescription::IndexDescription(const IndexDescription & other) : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) diff --git a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp index e8da210edc8..0777f43aaae 100644 --- a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp +++ b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp @@ -84,4 +84,4 @@ ColumnsDescription MeiliSearchColumnDescriptionFetcher::fetchColumnsDescription( return ColumnsDescription(list); } -}; +} diff --git a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.h b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.h index 29d5f865d8c..19b40251d9d 100644 --- a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.h +++ b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.h @@ -21,4 +21,4 @@ private: MeiliSearchConnection connection; }; -}; +} diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 5ecb7b537e2..b244bd489f1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -660,7 +660,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const { return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); -}; +} bool MergeTreeIndexFullText::mayBenefitFromIndexForIn(const ASTPtr & node) const { diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 30fb7e55a10..3c31deda823 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -484,7 +484,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const { return std::make_shared(index.name, index.sample_block, max_rows, query, context); -}; +} bool MergeTreeIndexSet::mayBenefitFromIndexForIn(const ASTPtr &) const { diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index de99193e4d3..b3ff05a960a 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -123,14 +123,14 @@ static const ASTFunction * getAsTuple(const ASTPtr & node) if (const auto * func = node->as(); func && func->name == "tuple") return func; return {}; -}; +} static bool getAsTupleLiteral(const ASTPtr & node, Tuple & tuple) { if (const auto * value_tuple = node->as()) return value_tuple && value_tuple->value.tryGet(tuple); return false; -}; +} bool MergeTreeWhereOptimizer::tryAnalyzeTuple(Conditions & res, const ASTFunction * func, bool is_final) const { diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index a1bc0bd58da..5e9966a2794 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -29,7 +29,7 @@ namespace ErrorCodes extern const int ILLEGAL_PROJECTION; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; -}; +} bool ProjectionDescription::isPrimaryKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const { From dc4bc2908eb2c5fe67928645ed5bf3fef80588e2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 20 May 2022 15:14:01 +0200 Subject: [PATCH 333/615] Fix azure --- src/Disks/AzureObjectStorage.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Disks/AzureObjectStorage.cpp b/src/Disks/AzureObjectStorage.cpp index 7118d3e0c01..75a602760a7 100644 --- a/src/Disks/AzureObjectStorage.cpp +++ b/src/Disks/AzureObjectStorage.cpp @@ -196,10 +196,9 @@ void AzureObjectStorage::copyObject( /// NOLINT void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); - auto new_client = getAzureBlobContainerClient(config, config_prefix); - - client.set(std::move(new_client)); settings.set(std::move(new_settings)); + + /// We don't update client } From 78069765bb6791443be33950ca68e6a6a34e6deb Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 13:20:52 +0000 Subject: [PATCH 334/615] Try to remove code duplication --- src/Storages/StorageSnapshot.cpp | 31 +++---------------------------- src/Storages/StorageURL.cpp | 3 +-- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 8dd2a52b647..a4e2a8adede 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -92,34 +92,9 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; - - const auto & columns = getMetadataForQuery()->getColumns(); - for (const auto & name : column_names) - { - auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); - auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); - - if (column && !object_column) - { - res.insert({column->type->createColumn(), column->type, column->name}); - } - else if (object_column) - { - res.insert({object_column->type->createColumn(), object_column->type, object_column->name}); - } - else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) - { - /// Virtual columns must be appended after ordinary, because user can - /// override them. - const auto & type = it->second; - res.insert({type->createColumn(), type, name}); - } - else - { - throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs()); - } - } + auto columns_description = getDescriptionForColumns(column_names); + for (const auto & column : columns_description) + res.insert({column.type->createColumn(), column.type, column.name}); return res; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index e9814e519b7..0db4fa75aba 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -689,8 +689,7 @@ Pipe StorageURLWithFailover::read( Block block_for_format; if (isColumnOriented()) { - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; + columns_description = storage_snapshot->getDescriptionForColumns(column_names); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else From 6b7dd76fac2358137fecbf1a468b30898b23524d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 20 May 2022 07:14:52 +0300 Subject: [PATCH 335/615] Fix debug symbols in packages - before: usr/lib/debug/usr/bin/clickhouse.debug/clickhouse.debug - after : usr/lib/debug/usr/bin/clickhouse.debug Note, clickhouse_make_empty_debug_info_for_nfpm() is fine. Signed-off-by: Azat Khuzhin --- cmake/strip_binary.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index 2d6a3888503..be23a4c1c30 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -27,7 +27,7 @@ macro(clickhouse_strip_binary) ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse) + install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR} COMPONENT clickhouse) endmacro() From a4cf07708caf09942e525d7dbc98c8c6820e1934 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 14:57:27 +0000 Subject: [PATCH 336/615] Fix comments --- src/Disks/SingleDiskVolume.cpp | 6 ++++++ src/Formats/FormatFactory.cpp | 14 +++++++------- src/Formats/FormatFactory.h | 10 +++++----- src/IO/ReadHelpers.cpp | 8 +++++++- src/Storages/Hive/StorageHive.cpp | 4 ++-- src/Storages/StorageFile.h | 2 +- 6 files changed, 28 insertions(+), 16 deletions(-) create mode 100644 src/Disks/SingleDiskVolume.cpp diff --git a/src/Disks/SingleDiskVolume.cpp b/src/Disks/SingleDiskVolume.cpp new file mode 100644 index 00000000000..47140407026 --- /dev/null +++ b/src/Disks/SingleDiskVolume.cpp @@ -0,0 +1,6 @@ +#include + +namespace DB +{ + +} diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 961bed1f141..ca4dbe7f017 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -540,17 +540,17 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) { - auto & target = dict[name].supports_subset_of_columns_columns; + auto & target = dict[name].supports_subset_of_columns; if (target) - throw Exception("FormatFactory: Format " + name + " is already marked as column oriented", ErrorCodes::LOGICAL_ERROR); + throw Exception("FormatFactory: Format " + name + " is already marked as supporting subset of columns", ErrorCodes::LOGICAL_ERROR); target = true; } -bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name) +bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name) const { const auto & target = getCreators(name); - return target.supports_subset_of_columns_columns; + return target.supports_subset_of_columns; } bool FormatFactory::isInputFormat(const String & name) const @@ -565,19 +565,19 @@ bool FormatFactory::isOutputFormat(const String & name) const return it != dict.end() && it->second.output_creator; } -bool FormatFactory::checkIfFormatHasSchemaReader(const String & name) +bool FormatFactory::checkIfFormatHasSchemaReader(const String & name) const { const auto & target = getCreators(name); return bool(target.schema_reader_creator); } -bool FormatFactory::checkIfFormatHasExternalSchemaReader(const String & name) +bool FormatFactory::checkIfFormatHasExternalSchemaReader(const String & name) const { const auto & target = getCreators(name); return bool(target.external_schema_reader_creator); } -bool FormatFactory::checkIfFormatHasAnySchemaReader(const String & name) +bool FormatFactory::checkIfFormatHasAnySchemaReader(const String & name) const { return checkIfFormatHasSchemaReader(name) || checkIfFormatHasExternalSchemaReader(name); } diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 006d5d05099..8e949a3e367 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -108,7 +108,7 @@ private: SchemaReaderCreator schema_reader_creator; ExternalSchemaReaderCreator external_schema_reader_creator; bool supports_parallel_formatting{false}; - bool supports_subset_of_columns_columns{false}; + bool supports_subset_of_columns{false}; NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; AppendSupportChecker append_support_checker; }; @@ -196,11 +196,11 @@ public: void markOutputFormatSupportsParallelFormatting(const String & name); void markFormatSupportsSubsetOfColumns(const String & name); - bool checkIfFormatSupportsSubsetOfColumns(const String & name); + bool checkIfFormatSupportsSubsetOfColumns(const String & name) const; - bool checkIfFormatHasSchemaReader(const String & name); - bool checkIfFormatHasExternalSchemaReader(const String & name); - bool checkIfFormatHasAnySchemaReader(const String & name); + bool checkIfFormatHasSchemaReader(const String & name) const; + bool checkIfFormatHasExternalSchemaReader(const String & name) const; + bool checkIfFormatHasAnySchemaReader(const String & name) const; const FormatsDictionary & getAllFormats() const { diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 68a85e86741..1bf5f247b00 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -618,6 +618,12 @@ void readBackQuotedStringWithSQLStyle(String & s, ReadBuffer & buf) readBackQuotedStringInto(s, buf); } +template +concept WithResize = requires (T value) +{ + { value.resize(1) }; + { value.size() } -> std::integral<>; +}; template void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings) @@ -701,7 +707,7 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & if (!buf.hasPendingData()) continue; - if constexpr (!std::is_same_v) + if constexpr (WithResize) { /** CSV format can contain insignificant spaces and tabs. * Usually the task of skipping them is for the calling code. diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index b5251bbe098..38c8c054a9b 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -668,7 +668,7 @@ HiveFilePtr StorageHive::getHiveFileIfNeeded( return hive_file; } -bool StorageHive::supportsSamplingColumns() const +bool StorageHive::supportsSubsetOfColumns() const { return format_name == "Parquet" || format_name == "ORC"; } @@ -822,7 +822,7 @@ std::optional StorageHive::totalRowsImpl(const Settings & settings, const SelectQueryInfo & query_info, ContextPtr context_, PruneLevel prune_level) const { /// Row-based format like Text doesn't support totalRowsByPartitionPredicate - if (!supportsSamplingColumns()) + if (!supportsSubsetOfColumns()) return {}; auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 66b71476653..f47f6172c1c 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -69,7 +69,7 @@ public: static Strings getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read); - /// Check if the format supports reading only some sampling of columns. + /// Check if the format supports reading only some subset of columns. /// Is is useful because such formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. From cb0e6c2718dcc01f21251d013b943ff44851c9ad Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 20 May 2022 15:29:54 +0000 Subject: [PATCH 337/615] mark all operators bool() as explicit --- src/Common/COW.h | 2 +- src/Common/HashTable/StringHashTable.h | 2 +- src/Core/BackgroundSchedulePool.h | 2 +- src/Core/Block.h | 2 +- src/Processors/Chunk.h | 2 +- src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp | 2 +- src/Server/TCPHandler.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndices.h | 2 +- src/Storages/System/StorageSystemPartsBase.h | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Common/COW.h b/src/Common/COW.h index f958fe71824..f772acd84e0 100644 --- a/src/Common/COW.h +++ b/src/Common/COW.h @@ -219,7 +219,7 @@ protected: /// Get internal immutable ptr. Does not change internal use counter. immutable_ptr detach() && { return std::move(value); } - operator bool() const { return value != nullptr; } /// NOLINT + explicit operator bool() const { return value != nullptr; } bool operator! () const { return value == nullptr; } bool operator== (const chameleon_ptr & rhs) const { return value == rhs.value; } diff --git a/src/Common/HashTable/StringHashTable.h b/src/Common/HashTable/StringHashTable.h index 7e259d66cd0..6a8bdc06218 100644 --- a/src/Common/HashTable/StringHashTable.h +++ b/src/Common/HashTable/StringHashTable.h @@ -169,7 +169,7 @@ struct StringHashTableLookupResult auto & operator*() const { return *this; } auto * operator->() { return this; } auto * operator->() const { return this; } - operator bool() const { return mapped_ptr; } /// NOLINT + explicit operator bool() const { return mapped_ptr; } friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; } friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; } friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; } diff --git a/src/Core/BackgroundSchedulePool.h b/src/Core/BackgroundSchedulePool.h index 35a471a367f..fbd7e3f749a 100644 --- a/src/Core/BackgroundSchedulePool.h +++ b/src/Core/BackgroundSchedulePool.h @@ -161,7 +161,7 @@ public: task_info->deactivate(); } - operator bool() const { return task_info != nullptr; } /// NOLINT + explicit operator bool() const { return task_info != nullptr; } BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); } const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); } diff --git a/src/Core/Block.h b/src/Core/Block.h index 8089dffd1dc..85bbc5005df 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -108,7 +108,7 @@ public: /// Approximate number of allocated bytes in memory - for profiling and limits. size_t allocatedBytes() const; - operator bool() const { return !!columns(); } /// NOLINT + explicit operator bool() const { return !!columns(); } bool operator!() const { return !this->operator bool(); } /// NOLINT /** Get a list of column names separated by commas. */ diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 1c9240ba114..da5fe0c23ed 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -90,7 +90,7 @@ public: bool hasRows() const { return num_rows > 0; } bool hasColumns() const { return !columns.empty(); } bool empty() const { return !hasRows() && !hasColumns(); } - operator bool() const { return !empty(); } /// NOLINT + explicit operator bool() const { return !empty(); } void addColumn(ColumnPtr column); void addColumn(size_t position, ColumnPtr column); diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 0efb1d71004..d3e167d35c6 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -88,7 +88,7 @@ void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupStatusPtr threa // We don't know how many blocks will be. So we have to read them all // until an empty block occurred. Chunk chunk; - while (!parsing_finished && (chunk = parser.getChunk()) != Chunk()) + while (!parsing_finished && (chunk = parser.getChunk())) { /// Variable chunk is moved, but it is not really used in the next iteration. /// NOLINTNEXTLINE(bugprone-use-after-move, hicpp-invalid-access-moved) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 687b32d0ef0..f056842926d 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1480,7 +1480,7 @@ bool TCPHandler::receiveUnexpectedData(bool throw_exception) maybe_compressed_in = in; auto skip_block_in = std::make_shared(*maybe_compressed_in, client_tcp_protocol_version); - bool read_ok = skip_block_in->read(); + bool read_ok = !!skip_block_in->read(); if (!read_ok) state.read_all_data = true; diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 984a2bb7762..d76216e1598 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -29,7 +29,7 @@ struct MergeTreeIndexFormat MergeTreeIndexVersion version; const char* extension; - operator bool() const { return version != 0; } /// NOLINT + explicit operator bool() const { return version != 0; } }; /// Stores some info about a single block of data. diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 477261ad7ad..36c0fd551df 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -22,7 +22,7 @@ struct StoragesInfo bool need_inactive_parts = false; MergeTreeData * data = nullptr; - operator bool() const { return storage != nullptr; } /// NOLINT + explicit operator bool() const { return storage != nullptr; } MergeTreeData::DataPartsVector getParts(MergeTreeData::DataPartStateVector & state, bool has_state_column, bool require_projection_parts = false) const; }; From 10277985177ea9066109d3c04c18d54847e138b3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 20 May 2022 17:35:29 +0200 Subject: [PATCH 338/615] handle connection loss on commit --- src/Common/ErrorCodes.cpp | 1 + .../InterpreterTransactionControlQuery.cpp | 15 ++- src/Interpreters/MergeTreeTransaction.cpp | 4 +- src/Interpreters/MergeTreeTransaction.h | 1 + src/Interpreters/TransactionLog.cpp | 102 ++++++++++++++---- src/Interpreters/TransactionLog.h | 7 ++ src/Interpreters/executeQuery.cpp | 5 +- .../System/StorageSystemTransactions.cpp | 1 + 8 files changed, 114 insertions(+), 22 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 5f78c79f606..5d5ce052aaa 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -627,6 +627,7 @@ M(656, MEILISEARCH_EXCEPTION) \ M(657, UNSUPPORTED_MEILISEARCH_TYPE) \ M(658, MEILISEARCH_MISSING_SOME_COLUMNS) \ + M(659, UNKNOWN_STATUS_OF_TRANSACTION) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index 61b2a4e865f..148c6e93919 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -10,6 +10,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int INVALID_TRANSACTION; + extern const int UNKNOWN_STATUS_OF_TRANSACTION; } BlockIO InterpreterTransactionControlQuery::execute() @@ -55,7 +56,17 @@ BlockIO InterpreterTransactionControlQuery::executeCommit(ContextMutablePtr sess if (txn->getState() != MergeTreeTransaction::RUNNING) throw Exception(ErrorCodes::INVALID_TRANSACTION, "Transaction is not in RUNNING state"); - TransactionLog::instance().commitTransaction(txn); + try + { + TransactionLog::instance().commitTransaction(txn); + } + catch (const Exception & e) + { + /// Detach transaction from current context if connection was lost and its status is unknown + if (e.code() == ErrorCodes::UNKNOWN_STATUS_OF_TRANSACTION) + session_context->setCurrentTransaction(NO_TRANSACTION_PTR); + throw; + } session_context->setCurrentTransaction(NO_TRANSACTION_PTR); return {}; } @@ -67,6 +78,8 @@ BlockIO InterpreterTransactionControlQuery::executeRollback(ContextMutablePtr se throw Exception(ErrorCodes::INVALID_TRANSACTION, "There is no current transaction"); if (txn->getState() == MergeTreeTransaction::COMMITTED) throw Exception(ErrorCodes::LOGICAL_ERROR, "Transaction is in COMMITTED state"); + if (txn->getState() == MergeTreeTransaction::COMMITTING) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Transaction is in COMMITTING state"); if (txn->getState() == MergeTreeTransaction::RUNNING) TransactionLog::instance().rollbackTransaction(txn); diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp index d3f523aafc9..0607279ac68 100644 --- a/src/Interpreters/MergeTreeTransaction.cpp +++ b/src/Interpreters/MergeTreeTransaction.cpp @@ -38,8 +38,10 @@ void MergeTreeTransaction::setSnapshot(CSN new_snapshot) MergeTreeTransaction::State MergeTreeTransaction::getState() const { CSN c = csn.load(); - if (c == Tx::UnknownCSN || c == Tx::CommittingCSN) + if (c == Tx::UnknownCSN) return RUNNING; + if (c == Tx::CommittingCSN) + return COMMITTING; if (c == Tx::RolledBackCSN) return ROLLED_BACK; return COMMITTED; diff --git a/src/Interpreters/MergeTreeTransaction.h b/src/Interpreters/MergeTreeTransaction.h index 7ebea450dd0..6c34e8a8388 100644 --- a/src/Interpreters/MergeTreeTransaction.h +++ b/src/Interpreters/MergeTreeTransaction.h @@ -26,6 +26,7 @@ public: enum State { RUNNING, + COMMITTING, COMMITTED, ROLLED_BACK, }; diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 3fe0920427e..c1ecc9e0f21 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -21,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int UNKNOWN_STATUS_OF_TRANSACTION; } static void tryWriteEventToSystemLog(Poco::Logger * log, ContextPtr context, @@ -217,7 +218,8 @@ void TransactionLog::runUpdatingThread() if (stop_flag.load()) return; - if (getZooKeeper()->expired()) + bool connection_loss = getZooKeeper()->expired(); + if (connection_loss) { auto new_zookeeper = global_context->getZooKeeper(); std::lock_guard lock{mutex}; @@ -226,6 +228,9 @@ void TransactionLog::runUpdatingThread() loadNewEntries(); removeOldEntries(); + + if (connection_loss) + tryFinalizeUnknownStateTransactions(); } catch (const Coordination::Exception &) { @@ -314,6 +319,32 @@ void TransactionLog::removeOldEntries() tid_to_csn.erase(tid_hash); } +void TransactionLog::tryFinalizeUnknownStateTransactions() +{ + /// We just recovered connection to [Zoo]Keeper. + /// Check if transactions in unknown state were actually committed or not and finalize or rollback them. + UnknownStateList list; + { + std::lock_guard lock{running_list_mutex}; + std::swap(list, unknown_state_list); + } + + for (auto & [txn, state_guard] : list) + { + /// CSNs must be already loaded, only need to check if the corresponding mapping exists. + if (auto csn = getCSN(txn->tid)) + { + finalizeCommittedTransaction(txn, csn); + } + else + { + assertTIDIsNotOutdated(txn->tid); + state_guard = {}; + rollbackTransaction(txn->shared_from_this()); + } + } +} + CSN TransactionLog::getLatestSnapshot() const { return latest_snapshot.load(); @@ -342,55 +373,90 @@ MergeTreeTransactionPtr TransactionLog::beginTransaction() CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn) { /// Some precommit checks, may throw - auto committing_lock = txn->beforeCommit(); + auto state_guard = txn->beforeCommit(); - CSN new_csn; + CSN allocated_csn = Tx::UnknownCSN; if (txn->isReadOnly()) { /// Don't need to allocate CSN in ZK for readonly transactions, it's safe to use snapshot/start_csn as "commit" timestamp LOG_TEST(log, "Closing readonly transaction {}", txn->tid); - new_csn = txn->snapshot; - tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::COMMIT, txn->tid, new_csn); } else { LOG_TEST(log, "Committing transaction {}", txn->dumpDescription()); - /// TODO handle connection loss /// TODO support batching auto current_zookeeper = getZooKeeper(); - String path_created = current_zookeeper->create(zookeeper_path_log + "/csn-", serializeTID(txn->tid), zkutil::CreateMode::PersistentSequential); /// Commit point - NOEXCEPT_SCOPE; + String csn_path_created; + try + { + /// Commit point + csn_path_created = current_zookeeper->create(zookeeper_path_log + "/csn-", serializeTID(txn->tid), zkutil::CreateMode::PersistentSequential); + } + catch (const Coordination::Exception & e) + { + if (!Coordination::isHardwareError(e.code)) + throw; + /// We don't know if transaction has been actually committed or not. + /// The only thing we can do is to postpone its finalization. + { + std::lock_guard lock{running_list_mutex}; + unknown_state_list.emplace_back(txn.get(), std::move(state_guard)); + } + log_updated_event->set(); + throw Exception(ErrorCodes::UNKNOWN_STATUS_OF_TRANSACTION, "Connection lost on attempt to commit transaction {}, will finalize it later: {}", txn->tid, e.message()); + } + + NOEXCEPT_SCOPE; /// FIXME Transactions: Sequential node numbers in ZooKeeper are Int32, but 31 bit is not enough for production use /// (overflow is possible in a several weeks/months of active usage) - new_csn = deserializeCSN(path_created.substr(zookeeper_path_log.size() + 1)); + allocated_csn = deserializeCSN(csn_path_created.substr(zookeeper_path_log.size() + 1)); + } - LOG_INFO(log, "Transaction {} committed with CSN={}", txn->tid, new_csn); - tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::COMMIT, txn->tid, new_csn); + return finalizeCommittedTransaction(txn.get(), allocated_csn); +} + +CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN allocated_csn) noexcept +{ + chassert(!allocated_csn == txn->isReadOnly()); + if (allocated_csn) + { + LOG_INFO(log, "Transaction {} committed with CSN={}", txn->tid, allocated_csn); + tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::COMMIT, txn->tid, allocated_csn); /// Wait for committed changes to become actually visible, so the next transaction in this session will see the changes /// TODO it's optional, add a setting for this auto current_latest_snapshot = latest_snapshot.load(); - while (current_latest_snapshot < new_csn && !stop_flag) + while (current_latest_snapshot < allocated_csn && !stop_flag) { latest_snapshot.wait(current_latest_snapshot); current_latest_snapshot = latest_snapshot.load(); } } + else + { + /// Transaction was readonly + allocated_csn = txn->snapshot; + tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::COMMIT, txn->tid, allocated_csn); + } /// Write allocated CSN, so we will be able to cleanup log in ZK. This method is noexcept. - txn->afterCommit(new_csn); + txn->afterCommit(allocated_csn); { /// Finally we can remove transaction from the list and release the snapshot + MergeTreeTransactionPtr txn_ptr; std::lock_guard lock{running_list_mutex}; + snapshots_in_use.erase(txn->snapshot_in_use_it); bool removed = running_list.erase(txn->tid.getHash()); if (!removed) - throw Exception(ErrorCodes::LOGICAL_ERROR, "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); - snapshots_in_use.erase(txn->snapshot_in_use_it); + { + LOG_ERROR(log , "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); + abort(); + } } - return new_csn; + return allocated_csn; } void TransactionLog::rollbackTransaction(const MergeTreeTransactionPtr & txn) noexcept @@ -400,8 +466,8 @@ void TransactionLog::rollbackTransaction(const MergeTreeTransactionPtr & txn) no if (!txn->rollback()) { - /// Transaction was cancelled concurrently, it's already rolled back. - chassert(txn->csn == Tx::RolledBackCSN); + /// Transaction was cancelled or committed concurrently + chassert(txn->csn != Tx::UnknownCSN); return; } diff --git a/src/Interpreters/TransactionLog.h b/src/Interpreters/TransactionLog.h index 86584a74c68..69a9c9c7b75 100644 --- a/src/Interpreters/TransactionLog.h +++ b/src/Interpreters/TransactionLog.h @@ -127,6 +127,10 @@ private: void loadNewEntries(); void removeOldEntries(); + CSN finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN allocated_csn) noexcept; + + void tryFinalizeUnknownStateTransactions(); + static UInt64 deserializeCSN(const String & csn_node_name); static String serializeCSN(CSN csn); static TransactionID deserializeTID(const String & csn_node_content); @@ -159,6 +163,9 @@ private: mutable std::mutex running_list_mutex; /// Transactions that are currently processed TransactionsList running_list; + /// If we lost connection on attempt to create csn- node then we don't know transaction's state. + using UnknownStateList = std::vector>; + UnknownStateList unknown_state_list; /// Ordered list of snapshots that are currently used by some transactions. Needed for background cleanup. std::list snapshots_in_use; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 3c03bea3dd1..186c8c30cfa 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -444,9 +444,10 @@ static std::tuple executeQueryImpl( if (auto txn = context->getCurrentTransaction()) { - assert(txn->getState() != MergeTreeTransaction::COMMITTED); + chassert(txn->getState() != MergeTreeTransaction::COMMITTING); + chassert(txn->getState() != MergeTreeTransaction::COMMITTED); if (txn->getState() == MergeTreeTransaction::ROLLED_BACK && !ast->as() && !ast->as()) - throw Exception(ErrorCodes::INVALID_TRANSACTION, "Cannot execute query: transaction is rolled back"); + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Cannot execute query because current transaction failed. Expecting ROLLBACK statement."); } /// Interpret SETTINGS clauses as early as possible (before invoking the corresponding interpreter), diff --git a/src/Storages/System/StorageSystemTransactions.cpp b/src/Storages/System/StorageSystemTransactions.cpp index 396fc875f74..21fa72ea12a 100644 --- a/src/Storages/System/StorageSystemTransactions.cpp +++ b/src/Storages/System/StorageSystemTransactions.cpp @@ -15,6 +15,7 @@ static DataTypePtr getStateEnumType() DataTypeEnum8::Values { {"RUNNING", static_cast(MergeTreeTransaction::State::RUNNING)}, + {"COMMITTING", static_cast(MergeTreeTransaction::State::COMMITTING)}, {"COMMITTED", static_cast(MergeTreeTransaction::State::COMMITTED)}, {"ROLLED_BACK", static_cast(MergeTreeTransaction::State::ROLLED_BACK)}, }); From 8c42a7c8f721e3a3e072013a591f1ffb2c5710ba Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 20 May 2022 17:38:35 +0200 Subject: [PATCH 339/615] Update 02304_grouping_sets_with_rollup_cube.sql --- .../0_stateless/02304_grouping_sets_with_rollup_cube.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql index 524ef363b6b..8a53ccb5121 100644 --- a/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql +++ b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check:22.4 +-- Tags: no-backward-compatibility-check:22.5.1 SELECT number From d414d85654863b92de376339aa30d84d8efdff3d Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 20 May 2022 17:43:48 +0200 Subject: [PATCH 340/615] Fix hdfs bug --- src/Disks/DiskObjectStorage.cpp | 8 ++++++- src/Disks/HDFSObjectStorage.cpp | 21 ++----------------- .../test.py | 8 +++---- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 012a6d5b4c9..73ee5448b2a 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -500,6 +500,9 @@ String DiskObjectStorage::getUniqueId(const String & path) const bool DiskObjectStorage::checkObjectExists(const String & path) const { + if (!path.starts_with(remote_fs_root_path)) + return false; + return object_storage->exists(path); } @@ -714,7 +717,9 @@ void DiskObjectStorage::removeSharedRecursive(const String & path, bool keep_all for (auto && [local_path, remote_paths] : paths_to_remove) { if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) + { remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); + } } removeFromRemoteFS(remove_from_remote); } @@ -763,6 +768,7 @@ std::unique_ptr DiskObjectStorage::writeFile( const WriteSettings & settings) { auto blob_name = getRandomASCIIString(); + auto blob_path = fs::path(remote_fs_root_path) / blob_name; std::optional object_attributes; if (send_metadata) @@ -781,7 +787,7 @@ std::unique_ptr DiskObjectStorage::writeFile( [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, WriteMode::Rewrite, object_attributes, create_metadata_callback, buf_size, settings); + return object_storage->writeObject(blob_path, mode, object_attributes, std::move(create_metadata_callback), buf_size, settings); } diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/HDFSObjectStorage.cpp index 9e99e7aa820..5a1a70f6a50 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/HDFSObjectStorage.cpp @@ -106,14 +106,7 @@ void HDFSObjectStorage::removeObject(const std::string & path) void HDFSObjectStorage::removeObjects(const std::vector & paths) { for (const auto & hdfs_path : paths) - { - const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); - - /// Add path from root to file name - int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); - if (res == -1) - throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + hdfs_path); - } + removeObject(hdfs_path); } void HDFSObjectStorage::removeObjectIfExists(const std::string & path) @@ -125,17 +118,7 @@ void HDFSObjectStorage::removeObjectIfExists(const std::string & path) void HDFSObjectStorage::removeObjectsIfExist(const std::vector & paths) { for (const auto & hdfs_path : paths) - { - if (!exists(hdfs_path)) - continue; - - const size_t begin_of_path = hdfs_path.find('/', hdfs_path.find("//") + 2); - - /// Add path from root to file name - int res = hdfsDelete(hdfs_fs.get(), hdfs_path.substr(begin_of_path).c_str(), 0); - if (res == -1) - throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: " + hdfs_path); - } + removeObjectIfExists(hdfs_path); } ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py index 1e34a924e39..23f465eaabd 100644 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py @@ -77,7 +77,7 @@ def test_hdfs_zero_copy_replication_insert(cluster): ) node1.query("INSERT INTO hdfs_test VALUES (now() - INTERVAL 3 DAY, 10)") - node2.query("SYSTEM SYNC REPLICA hdfs_test") + node2.query("SYSTEM SYNC REPLICA hdfs_test", timeout=30) assert node1.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" assert node2.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" assert ( @@ -192,7 +192,7 @@ def test_hdfs_zero_copy_replication_move(cluster, storage_policy, init_objects): node1.query( "INSERT INTO move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)" ) - node2.query("SYSTEM SYNC REPLICA move_test") + node2.query("SYSTEM SYNC REPLICA move_test", timeout=30) assert ( node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") @@ -262,7 +262,7 @@ def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy): node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 1 DAY, 11)") node1.query("OPTIMIZE TABLE ttl_move_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_move_test") + node2.query("SYSTEM SYNC REPLICA ttl_move_test", timeout=30) assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" @@ -297,7 +297,7 @@ def test_hdfs_zero_copy_with_ttl_delete(cluster): node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 1 DAY, 11)") node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_delete_test") + node2.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" From 3bad21cea7b14273e5144302923e80d736146215 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 20 May 2022 17:48:24 +0000 Subject: [PATCH 341/615] Try fix tests --- src/Storages/ColumnsDescription.cpp | 5 +++-- src/Storages/ColumnsDescription.h | 2 +- src/Storages/StorageSnapshot.cpp | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index e11c2477572..7a43ae7af4b 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -196,7 +196,7 @@ static auto getNameRange(const ColumnsDescription::ColumnsContainer & columns, c return std::make_pair(begin, end); } -void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first) +void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first, bool add_subcolumns) { if (has(column.name)) throw Exception("Cannot add column " + column.name + ": column with this name already exists", @@ -222,7 +222,8 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu insert_it = range.second; } - addSubcolumns(column.name, column.type); + if (add_subcolumns) + addSubcolumns(column.name, column.type); columns.get<0>().insert(insert_it, std::move(column)); } diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index a3921d254b2..c81ccb5d217 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -100,7 +100,7 @@ public: explicit ColumnsDescription(NamesAndTypesList ordinary, NamesAndAliases aliases); /// `after_column` can be a Nested column name; - void add(ColumnDescription column, const String & after_column = String(), bool first = false); + void add(ColumnDescription column, const String & after_column = String(), bool first = false, bool add_subcolumns = true); /// `column_name` can be a Nested column name; void remove(const String & column_name); diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index a4e2a8adede..07c4c794210 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -106,14 +106,14 @@ ColumnsDescription StorageSnapshot::getDescriptionForColumns(const Names & colum for (const auto & name : column_names) { auto column = columns.tryGetColumnOrSubcolumnDescription(GetColumnsOptions::All, name); - auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); + auto object_column = object_columns.tryGetColumnOrSubcolumnDescription(GetColumnsOptions::All, name); if (column && !object_column) { - res.add(*column); + res.add(*column, "", false, false); } else if (object_column) { - res.add({object_column->name, object_column->type}); + res.add(*object_column, "", false, false); } else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) { From 704c78063f66091847d4c38eee01cf82fa113217 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 20 May 2022 19:54:02 +0200 Subject: [PATCH 342/615] Fix special build --- src/Processors/Formats/ISchemaReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 25b2675bb1c..00987540d04 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -129,6 +129,6 @@ void chooseResultColumnType( size_t row); void checkResultColumnTypeAndAppend( - NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t max_rows_to_read); + NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read); } From 5b08edefd15183b0c5de458e3a3e891c0372ce9e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 20 May 2022 20:07:15 +0200 Subject: [PATCH 343/615] Fix I don't understand --- src/Disks/DiskObjectStorage.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 73ee5448b2a..750a009ecf9 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -768,7 +768,6 @@ std::unique_ptr DiskObjectStorage::writeFile( const WriteSettings & settings) { auto blob_name = getRandomASCIIString(); - auto blob_path = fs::path(remote_fs_root_path) / blob_name; std::optional object_attributes; if (send_metadata) @@ -787,7 +786,7 @@ std::unique_ptr DiskObjectStorage::writeFile( [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(blob_path, mode, object_attributes, std::move(create_metadata_callback), buf_size, settings); + return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, mode, object_attributes, std::move(create_metadata_callback), buf_size, settings); } From 91bd0b6f69eef2f3a273e27e00bd79c63952f33d Mon Sep 17 00:00:00 2001 From: Matthew Peveler Date: Fri, 20 May 2022 15:52:19 -0400 Subject: [PATCH 344/615] Fix docs for regionToTopContinent parameters --- docs/en/sql-reference/functions/ym-dict-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 85215957443..142d91b73be 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -105,7 +105,7 @@ Example: `regionToCountry(toUInt32(213)) = 225` converts Moscow (213) to Russia Converts a region to a continent. In every other way, this function is the same as ‘regionToCity’. Example: `regionToContinent(toUInt32(213)) = 10001` converts Moscow (213) to Eurasia (10001). -### regionToTopContinent (#regiontotopcontinent) {#regiontotopcontinent-regiontotopcontinent} +### regionToTopContinent (id,\[, geobase\]) {#regiontotopcontinentid-geobase} Finds the highest continent in the hierarchy for the region. From 4c005e53b61a9088bac13b6af0bf065e56d740ce Mon Sep 17 00:00:00 2001 From: Matthew Peveler Date: Fri, 20 May 2022 15:54:23 -0400 Subject: [PATCH 345/615] Update ym-dict-functions.md --- docs/en/sql-reference/functions/ym-dict-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 142d91b73be..20f43200a16 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -105,7 +105,7 @@ Example: `regionToCountry(toUInt32(213)) = 225` converts Moscow (213) to Russia Converts a region to a continent. In every other way, this function is the same as ‘regionToCity’. Example: `regionToContinent(toUInt32(213)) = 10001` converts Moscow (213) to Eurasia (10001). -### regionToTopContinent (id,\[, geobase\]) {#regiontotopcontinentid-geobase} +### regionToTopContinent (id\[, geobase\]) {#regiontotopcontinentid-geobase} Finds the highest continent in the hierarchy for the region. From 44f2d4529a1ef19b5f692ab3d72d397eae89dda7 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 20 May 2022 22:08:46 +0200 Subject: [PATCH 346/615] better waiting, add fault injection --- src/Core/Settings.h | 1 + src/Core/SettingsEnums.cpp | 5 ++ src/Core/SettingsEnums.h | 9 +++ .../InterpreterTransactionControlQuery.cpp | 27 +++++++- .../InterpreterTransactionControlQuery.h | 2 +- src/Interpreters/MergeTreeTransaction.cpp | 11 +++ src/Interpreters/MergeTreeTransaction.h | 2 + .../MergeTreeTransactionHolder.cpp | 2 +- src/Interpreters/TransactionLog.cpp | 67 +++++++++++++++---- src/Interpreters/TransactionLog.h | 13 +++- .../TransactionVersionMetadata.cpp | 4 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 7 +- src/Storages/System/StorageSystemParts.cpp | 3 + tests/config/config.d/transactions.xml | 8 +++ .../0_stateless/01133_begin_commit_race.sh | 12 ++-- .../01172_transaction_counters.sql | 8 ++- 16 files changed, 154 insertions(+), 27 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 29427c673ac..6a166f53bf9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -590,6 +590,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \ M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ + M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \ M(Bool, compatibility_ignore_auto_increment_in_create_table, false, "Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL", 0) \ // End of COMMON_SETTINGS diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index a37c1e9be86..bff1971bad9 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -131,6 +131,11 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS {"force_enable", ShortCircuitFunctionEvaluation::FORCE_ENABLE}, {"disable", ShortCircuitFunctionEvaluation::DISABLE}}) +IMPLEMENT_SETTING_ENUM(TransactionsWaitCSNMode, ErrorCodes::BAD_ARGUMENTS, + {{"async", TransactionsWaitCSNMode::ASYNC}, + {"wait", TransactionsWaitCSNMode::WAIT}, + {"wait_unknown", TransactionsWaitCSNMode::WAIT_UNKNOWN}}) + IMPLEMENT_SETTING_ENUM(EnumComparingMode, ErrorCodes::BAD_ARGUMENTS, {{"by_names", FormatSettings::EnumComparingMode::BY_NAMES}, {"by_values", FormatSettings::EnumComparingMode::BY_VALUES}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 08091da6d6c..83a65f2a320 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -183,6 +183,15 @@ enum class ShortCircuitFunctionEvaluation DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) +enum class TransactionsWaitCSNMode +{ + ASYNC, + WAIT, + WAIT_UNKNOWN, +}; + +DECLARE_SETTING_ENUM(TransactionsWaitCSNMode) + DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparingMode) DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index 148c6e93919..bdcc351c32b 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -56,17 +56,40 @@ BlockIO InterpreterTransactionControlQuery::executeCommit(ContextMutablePtr sess if (txn->getState() != MergeTreeTransaction::RUNNING) throw Exception(ErrorCodes::INVALID_TRANSACTION, "Transaction is not in RUNNING state"); + TransactionsWaitCSNMode mode = query_context->getSettingsRef().wait_changes_become_visible_after_commit_mode; + CSN csn; try { - TransactionLog::instance().commitTransaction(txn); + csn = TransactionLog::instance().commitTransaction(txn, /* throw_on_unknown_status */ mode != TransactionsWaitCSNMode::WAIT_UNKNOWN); } catch (const Exception & e) { - /// Detach transaction from current context if connection was lost and its status is unknown if (e.code() == ErrorCodes::UNKNOWN_STATUS_OF_TRANSACTION) + { + /// Detach transaction from current context if connection was lost and its status is unknown session_context->setCurrentTransaction(NO_TRANSACTION_PTR); + } throw; } + + if (csn == Tx::CommittingCSN) + { + chassert(mode == TransactionsWaitCSNMode::WAIT_UNKNOWN); + + /// Try to wait for connection to be restored and its status to be loaded. + /// It's useful for testing. It allows to enable fault injection (after commit) without breaking tests. + txn->waitStateChange(Tx::CommittingCSN); + + if (txn->getState() == MergeTreeTransaction::ROLLED_BACK) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Transaction {} was rolled back", txn->tid); + if (txn->getState() != MergeTreeTransaction::COMMITTED) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Transaction {} has invalid state {}", txn->tid, txn->getState()); + } + + /// Wait for committed changes to become actually visible, so the next transaction in this session will see the changes + if (mode != TransactionsWaitCSNMode::ASYNC) + TransactionLog::instance().waitForCSNLoaded(csn); + session_context->setCurrentTransaction(NO_TRANSACTION_PTR); return {}; } diff --git a/src/Interpreters/InterpreterTransactionControlQuery.h b/src/Interpreters/InterpreterTransactionControlQuery.h index 05d3068e095..bf2dc7891a7 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.h +++ b/src/Interpreters/InterpreterTransactionControlQuery.h @@ -22,7 +22,7 @@ public: private: BlockIO executeBegin(ContextMutablePtr session_context); - static BlockIO executeCommit(ContextMutablePtr session_context); + BlockIO executeCommit(ContextMutablePtr session_context); static BlockIO executeRollback(ContextMutablePtr session_context); static BlockIO executeSetSnapshot(ContextMutablePtr session_context, UInt64 snapshot); diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp index 0607279ac68..11287f5de97 100644 --- a/src/Interpreters/MergeTreeTransaction.cpp +++ b/src/Interpreters/MergeTreeTransaction.cpp @@ -47,6 +47,17 @@ MergeTreeTransaction::State MergeTreeTransaction::getState() const return COMMITTED; } +bool MergeTreeTransaction::waitStateChange(CSN expected_state_csn) const +{ + CSN current_value = expected_state_csn; + while (current_value == expected_state_csn && !TransactionLog::instance().isShuttingDown()) + { + csn.wait(current_value); + current_value = csn.load(); + } + return current_value != expected_state_csn; +} + void MergeTreeTransaction::checkIsNotCancelled() const { CSN c = csn.load(); diff --git a/src/Interpreters/MergeTreeTransaction.h b/src/Interpreters/MergeTreeTransaction.h index 6c34e8a8388..f466262cb2e 100644 --- a/src/Interpreters/MergeTreeTransaction.h +++ b/src/Interpreters/MergeTreeTransaction.h @@ -56,6 +56,8 @@ public: Float64 elapsedSeconds() const { return elapsed.elapsedSeconds(); } + bool waitStateChange(CSN expected_state_csn) const; + private: scope_guard beforeCommit(); void afterCommit(CSN assigned_csn) noexcept; diff --git a/src/Interpreters/MergeTreeTransactionHolder.cpp b/src/Interpreters/MergeTreeTransactionHolder.cpp index bf63a471282..2944fb78b76 100644 --- a/src/Interpreters/MergeTreeTransactionHolder.cpp +++ b/src/Interpreters/MergeTreeTransactionHolder.cpp @@ -53,7 +53,7 @@ void MergeTreeTransactionHolder::onDestroy() noexcept { try { - TransactionLog::instance().commitTransaction(txn); + TransactionLog::instance().commitTransaction(txn, /* throw_on_unknown_status */ false); return; } catch (...) diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index c1ecc9e0f21..6fe89a2b80e 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -53,6 +53,8 @@ TransactionLog::TransactionLog() zookeeper_path = global_context->getConfigRef().getString("transaction_log.zookeeper_path", "/clickhouse/txn"); zookeeper_path_log = zookeeper_path + "/log"; + fault_probability_before_commit = global_context->getConfigRef().getDouble("transaction_log.fault_probability_before_commit", 0); + fault_probability_after_commit = global_context->getConfigRef().getDouble("transaction_log.fault_probability_after_commit", 0); loadLogFromZooKeeper(); @@ -214,7 +216,10 @@ void TransactionLog::runUpdatingThread() { try { - log_updated_event->wait(); + /// Do not wait if we have some transactions to finalize + if (!unknown_state_list_loaded.empty()) + log_updated_event->wait(); + if (stop_flag.load()) return; @@ -229,7 +234,7 @@ void TransactionLog::runUpdatingThread() loadNewEntries(); removeOldEntries(); - if (connection_loss) + if (connection_loss || fault_probability_before_commit || fault_probability_after_commit) tryFinalizeUnknownStateTransactions(); } catch (const Coordination::Exception &) @@ -325,8 +330,22 @@ void TransactionLog::tryFinalizeUnknownStateTransactions() /// Check if transactions in unknown state were actually committed or not and finalize or rollback them. UnknownStateList list; { + /// We must be sure that the corresponding CSN entry is loaded from ZK. + /// Otherwise we may accidentally rollback committed transaction in case of race condition like this: + /// - runUpdatingThread: loaded some entries, ready to call tryFinalizeUnknownStateTransactions() + /// - commitTransaction: creates CSN entry in the log (txn is committed) + /// - [session expires] + /// - commitTransaction: catches Coordination::Exception (maybe due to fault injection), appends txn to unknown_state_list + /// - runUpdatingThread: calls tryFinalizeUnknownStateTransactions(), fails to find CSN for this txn, rolls it back + /// So all CSN entries that might exist at the moment of appending txn to unknown_state_list + /// must be loaded from ZK before we start finalize that txn. + /// That's why we use two lists here: + /// 1. At first we put txn into unknown_state_list + /// 2. We move it to unknown_state_list_loaded when runUpdatingThread done at least one iteration + /// 3. Then we can safely finalize txns from unknown_state_list_loaded, because all required entries are loaded std::lock_guard lock{running_list_mutex}; std::swap(list, unknown_state_list); + std::swap(list, unknown_state_list_loaded); } for (auto & [txn, state_guard] : list) @@ -370,7 +389,7 @@ MergeTreeTransactionPtr TransactionLog::beginTransaction() return txn; } -CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn) +CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool throw_on_unknown_status) { /// Some precommit checks, may throw auto state_guard = txn->beforeCommit(); @@ -389,8 +408,22 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn) String csn_path_created; try { + if (unlikely(fault_probability_before_commit)) + { + std::bernoulli_distribution fault(fault_probability_before_commit); + if (fault(thread_local_rng)) + throw Coordination::Exception("Fault injected (before commit)", Coordination::Error::ZCONNECTIONLOSS); + } + /// Commit point csn_path_created = current_zookeeper->create(zookeeper_path_log + "/csn-", serializeTID(txn->tid), zkutil::CreateMode::PersistentSequential); + + if (unlikely(fault_probability_after_commit)) + { + std::bernoulli_distribution fault(fault_probability_after_commit); + if (fault(thread_local_rng)) + throw Coordination::Exception("Fault injected (after commit)", Coordination::Error::ZCONNECTIONLOSS); + } } catch (const Coordination::Exception & e) { @@ -404,7 +437,13 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn) unknown_state_list.emplace_back(txn.get(), std::move(state_guard)); } log_updated_event->set(); - throw Exception(ErrorCodes::UNKNOWN_STATUS_OF_TRANSACTION, "Connection lost on attempt to commit transaction {}, will finalize it later: {}", txn->tid, e.message()); + if (throw_on_unknown_status) + throw Exception(ErrorCodes::UNKNOWN_STATUS_OF_TRANSACTION, + "Connection lost on attempt to commit transaction {}, will finalize it later: {}", + txn->tid, e.message()); + + LOG_INFO(log, "Connection lost on attempt to commit transaction {}, will finalize it later: {}", txn->tid, e.message()); + return Tx::CommittingCSN; } NOEXCEPT_SCOPE; @@ -423,15 +462,6 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN { LOG_INFO(log, "Transaction {} committed with CSN={}", txn->tid, allocated_csn); tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::COMMIT, txn->tid, allocated_csn); - - /// Wait for committed changes to become actually visible, so the next transaction in this session will see the changes - /// TODO it's optional, add a setting for this - auto current_latest_snapshot = latest_snapshot.load(); - while (current_latest_snapshot < allocated_csn && !stop_flag) - { - latest_snapshot.wait(current_latest_snapshot); - current_latest_snapshot = latest_snapshot.load(); - } } else { @@ -459,6 +489,17 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN return allocated_csn; } +bool TransactionLog::waitForCSNLoaded(CSN csn) const +{ + auto current_latest_snapshot = latest_snapshot.load(); + while (current_latest_snapshot < csn && !stop_flag) + { + latest_snapshot.wait(current_latest_snapshot); + current_latest_snapshot = latest_snapshot.load(); + } + return csn <= current_latest_snapshot; +} + void TransactionLog::rollbackTransaction(const MergeTreeTransactionPtr & txn) noexcept { LOG_TRACE(log, "Rolling back transaction {}{}", txn->tid, diff --git a/src/Interpreters/TransactionLog.h b/src/Interpreters/TransactionLog.h index 69a9c9c7b75..25892f77bd7 100644 --- a/src/Interpreters/TransactionLog.h +++ b/src/Interpreters/TransactionLog.h @@ -97,7 +97,8 @@ public: /// Tries to commit transaction. Returns Commit Sequence Number. /// Throw if transaction was concurrently killed or if some precommit check failed. /// May throw if ZK connection is lost. Transaction status is unknown in this case. - CSN commitTransaction(const MergeTreeTransactionPtr & txn); + /// Returns CommittingCSN if throw_on_unknown_status is false and connection was lost. + CSN commitTransaction(const MergeTreeTransactionPtr & txn, bool throw_on_unknown_status); /// Releases locks that that were acquired by transaction, releases snapshot, removes transaction from the list of active transactions. /// Normally it should not throw, but if it does for some reason (global memory limit exceeded, disk failure, etc) @@ -119,6 +120,12 @@ public: /// Returns copy of list of running transactions. TransactionsList getTransactionsList() const; + /// Waits for provided CSN (and all previous ones) to be loaded from the log. + /// Returns false if waiting was interrupted (e.g. by shutdown) + bool waitForCSNLoaded(CSN csn) const; + + bool isShuttingDown() const { return stop_flag.load(); } + private: void loadLogFromZooKeeper(); void runUpdatingThread(); @@ -166,6 +173,7 @@ private: /// If we lost connection on attempt to create csn- node then we don't know transaction's state. using UnknownStateList = std::vector>; UnknownStateList unknown_state_list; + UnknownStateList unknown_state_list_loaded; /// Ordered list of snapshots that are currently used by some transactions. Needed for background cleanup. std::list snapshots_in_use; @@ -182,6 +190,9 @@ private: std::atomic_bool stop_flag = false; ThreadFromGlobalPool updating_thread; + + Float64 fault_probability_before_commit = 0; + Float64 fault_probability_after_commit = 0; }; template diff --git a/src/Interpreters/TransactionVersionMetadata.cpp b/src/Interpreters/TransactionVersionMetadata.cpp index fd75faaf206..36a4fb9cc5b 100644 --- a/src/Interpreters/TransactionVersionMetadata.cpp +++ b/src/Interpreters/TransactionVersionMetadata.cpp @@ -391,7 +391,9 @@ void VersionMetadata::read(ReadBuffer & buf) { /// NOTE Metadata file may actually contain multiple creation TIDs, we need the last one. removal_tid = TransactionID::read(buf); - if (!removal_tid.isEmpty()) + if (removal_tid.isEmpty()) + removal_tid_lock = 0; + else removal_tid_lock = removal_tid.getHash(); } else if (name == REMOVAL_CSN_STR) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 2ddca8dce26..40fba34cd03 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1437,7 +1437,9 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const bool valid_removal_tid = version.removal_tid == file.removal_tid || version.removal_tid == Tx::PrehistoricTID; bool valid_creation_csn = version.creation_csn == file.creation_csn || version.creation_csn == Tx::RolledBackCSN; bool valid_removal_csn = version.removal_csn == file.removal_csn || version.removal_csn == Tx::PrehistoricCSN; - if (!valid_creation_tid || !valid_removal_tid || !valid_creation_csn || !valid_removal_csn) + bool valid_removal_tid_lock = (version.removal_tid.isEmpty() && version.removal_tid_lock == 0) + || (version.removal_tid_lock == version.removal_tid.getHash()); + if (!valid_creation_tid || !valid_removal_tid || !valid_creation_csn || !valid_removal_csn || !valid_removal_tid_lock) throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid version metadata file"); return true; } @@ -1445,7 +1447,8 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const { WriteBufferFromOwnString expected; version.write(expected); - tryLogCurrentException(storage.log, fmt::format("File {} contains:\n{}\nexpected:\n{}", version_file_name, content, expected.str())); + tryLogCurrentException(storage.log, fmt::format("File {} contains:\n{}\nexpected:\n{}\nlock: {}", + version_file_name, content, expected.str(), version.removal_tid_lock)); return false; } } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 6674de06c07..a8edb8dd78b 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -85,6 +85,7 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"visible", std::make_shared()}, {"creation_tid", getTransactionIDDataType()}, + {"removal_tid_lock", std::make_shared()}, {"removal_tid", getTransactionIDDataType()}, {"creation_csn", std::make_shared()}, {"removal_csn", std::make_shared()}, @@ -295,6 +296,8 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) columns[res_index++]->insert(get_tid_as_field(part->version.creation_tid)); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->version.removal_tid_lock.load(std::memory_order_relaxed)); if (columns_mask[src_index++]) columns[res_index++]->insert(get_tid_as_field(part->version.getRemovalTID())); if (columns_mask[src_index++]) diff --git a/tests/config/config.d/transactions.xml b/tests/config/config.d/transactions.xml index 19810986ea1..9948b1f1865 100644 --- a/tests/config/config.d/transactions.xml +++ b/tests/config/config.d/transactions.xml @@ -10,4 +10,12 @@ 7500 + + /test/clickhouse/txn + + 0.0 + + + 0.01 + diff --git a/tests/queries/0_stateless/01133_begin_commit_race.sh b/tests/queries/0_stateless/01133_begin_commit_race.sh index 29e7ef423a1..f64570950c7 100755 --- a/tests/queries/0_stateless/01133_begin_commit_race.sh +++ b/tests/queries/0_stateless/01133_begin_commit_race.sh @@ -14,23 +14,25 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE mt (n Int64) ENGINE=MergeTree ORDER BY function begin_commit_readonly() { $CLICKHOUSE_CLIENT --multiquery --query " + SET wait_changes_become_visible_after_commit_mode='wait'; BEGIN TRANSACTION; - COMMIT;"; + COMMIT;" 2>&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_STATUS_OF_TRANSACTION } function begin_rollback_readonly() { - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --wait_changes_become_visible_after_commit_mode=wait_unknown --multiquery --query " BEGIN TRANSACTION; - ROLLBACK;"; + SET TRANSACTION SNAPSHOT 42; + ROLLBACK;" } function begin_insert_commit() { - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --wait_changes_become_visible_after_commit_mode=async --multiquery --query " BEGIN TRANSACTION; INSERT INTO mt VALUES ($RANDOM); - COMMIT;"; + COMMIT;" 2>&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_STATUS_OF_TRANSACTION } function introspection() diff --git a/tests/queries/0_stateless/01172_transaction_counters.sql b/tests/queries/0_stateless/01172_transaction_counters.sql index 5431673fd62..b84a7b25c47 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.sql +++ b/tests/queries/0_stateless/01172_transaction_counters.sql @@ -42,7 +42,13 @@ rollback; system flush logs; select indexOf((select arraySort(groupUniqArray(tid)) from system.transactions_info_log where database=currentDatabase() and table='txn_counters'), tid), - (toDecimal64(now64(6), 6) - toDecimal64(event_time, 6)) < 100, type, thread_id!=0, length(query_id)=length(queryID()), tid_hash!=0, csn=0, part + (toDecimal64(now64(6), 6) - toDecimal64(event_time, 6)) < 100, + type, + thread_id!=0, + length(query_id)=length(queryID()) or type='Commit' and query_id='', -- ignore fault injection after commit + tid_hash!=0, + csn=0, + part from system.transactions_info_log where tid in (select tid from system.transactions_info_log where database=currentDatabase() and table='txn_counters' and not (tid.1=1 and tid.2=1)) or (database=currentDatabase() and table='txn_counters') order by event_time; From 42439aeb3cd3f49a2861ea56d4cf2399f8f56076 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 20 May 2022 22:42:48 +0200 Subject: [PATCH 347/615] Improve performance of number comparison functions --- src/Functions/FunctionsComparison.h | 44 +++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 48170d6f564..2911fb5d004 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -84,8 +85,8 @@ struct NumComparisonImpl using ContainerA = PaddedPODArray; using ContainerB = PaddedPODArray; - /// If you don't specify NO_INLINE, the compiler will inline this function, but we don't need this as this function contains tight loop inside. - static void NO_INLINE vectorVector(const ContainerA & a, const ContainerB & b, PaddedPODArray & c) + MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorVectorImpl, + MULTITARGET_FH(static void), /*vectorVectorImpl*/ MULTITARGET_FB((const ContainerA & a, const ContainerB & b, PaddedPODArray & c) /// NOLINT { /** GCC 4.8.2 vectorizes a loop only if it is written in this form. * In this case, if you loop through the array index (the code will look simpler), @@ -105,9 +106,29 @@ struct NumComparisonImpl ++b_pos; ++c_pos; } + })) + + static void NO_INLINE vectorVector(const ContainerA & a, const ContainerB & b, PaddedPODArray & c) + { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + { + vectorVectorImplAVX2(a, b, c); + return; + } + else if (isArchSupported(TargetArch::SSE42)) + { + vectorVectorImplSSE42(a, b, c); + return; + } +#endif + + vectorVectorImpl(a, b, c); } - static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray & c) + + MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorConstantImpl, + MULTITARGET_FH(static void), /*vectorConstantImpl*/ MULTITARGET_FB((const ContainerA & a, B b, PaddedPODArray & c) /// NOLINT { size_t size = a.size(); const A * __restrict a_pos = a.data(); @@ -120,6 +141,23 @@ struct NumComparisonImpl ++a_pos; ++c_pos; } + })) + + static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray & c) { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + { + vectorConstantImplAVX2(a, b, c); + return; + } + else if (isArchSupported(TargetArch::SSE42)) + { + vectorConstantImplSSE42(a, b, c); + return; + } +#endif + + vectorConstantImpl(a, b, c); } static void constantVector(A a, const ContainerB & b, PaddedPODArray & c) From 4859108febc4f10f3012f8fcbe08a02d71980d5b Mon Sep 17 00:00:00 2001 From: ndchikin Date: Fri, 20 May 2022 20:49:10 +0000 Subject: [PATCH 348/615] WindowTransform::moveRowNumber fix --- src/Processors/Transforms/WindowTransform.cpp | 2 +- .../0_stateless/02306_window_move_row_number_fix.reference | 1 + tests/queries/0_stateless/02306_window_move_row_number_fix.sql | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02306_window_move_row_number_fix.reference create mode 100644 tests/queries/0_stateless/02306_window_move_row_number_fix.sql diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 45993be70d9..82e7cd48085 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -425,7 +425,7 @@ auto WindowTransform::moveRowNumberNoCheck(const RowNumber & _x, int64_t offset) { RowNumber x = _x; - if (offset > 0) + if (offset > 0 && x != blocksEnd()) { for (;;) { diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.reference b/tests/queries/0_stateless/02306_window_move_row_number_fix.reference new file mode 100644 index 00000000000..dec7d2fabd2 --- /dev/null +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql new file mode 100644 index 00000000000..96dd8f6176b --- /dev/null +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql @@ -0,0 +1 @@ +SELECT nth_value(NULL, 1048577) OVER (Rows BETWEEN 1023 FOLLOWING AND UNBOUNDED FOLLOWING) From d4974ddaf0dcb44dcef23e585010e46e60ef8637 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 20 May 2022 22:59:55 +0200 Subject: [PATCH 349/615] fix test --- .../queries/0_stateless/02117_show_create_table_system.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index ad18e38adcc..d4ada9ba5c8 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -485,6 +485,7 @@ CREATE TABLE system.parts `projections` Array(String), `visible` UInt8, `creation_tid` Tuple(UInt64, UInt64, UUID), + `removal_tid_lock` UInt64, `removal_tid` Tuple(UInt64, UInt64, UUID), `creation_csn` UInt64, `removal_csn` UInt64, From 6e3f741a25794906c6e74aad16d1339d7d03da14 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 20 May 2022 17:21:04 -0400 Subject: [PATCH 350/615] allow SALT only for SHA256_HASH --- src/Parsers/Access/ParserCreateUserQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index f5a1c6869d2..eda2505f88d 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -120,7 +120,7 @@ namespace return false; value = ast->as().value.safeGet(); - if (expect_hash) + if (expect_hash && type == AuthenticationType::SHA256_PASSWORD) { if (ParserKeyword{"SALT"}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected)) { From c3d8668eb66b1a3c2e39a37c1d607964149749bf Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Fri, 20 May 2022 17:34:14 -0400 Subject: [PATCH 351/615] add ClickHouse Keeper to docs --- docs/en/operations/clickhouse-keeper.md | 26 +++++++++---------- docs/en/operations/configuration-files.md | 2 +- docs/en/operations/system-tables/zookeeper.md | 6 ++--- docs/en/operations/tips.md | 6 ++--- .../operations/utilities/clickhouse-copier.md | 16 ++++++------ 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 44f6a57f6d4..7dbe0601343 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -5,15 +5,15 @@ sidebar_label: ClickHouse Keeper # ClickHouse Keeper {#clickHouse-keeper} -ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper. +ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper. ## Implementation details {#implementation-details} -ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages. +ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, and has quite a simple and powerful data model. ZooKeeper's coordination algorithm, ZooKeeper Atomic Broadcast (ZAB), doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses the [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows linearizability for reads and writes, and has several open-source implementations in different languages. -By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but `clickhouse-keeper-converter` tool allows to convert ZooKeeper data to ClickHouse Keeper snapshot. Interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so mixed ZooKeeper / ClickHouse Keeper cluster is impossible. +By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but the `clickhouse-keeper-converter` tool enables the conversion of ZooKeeper data to ClickHouse Keeper snapshots. The interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so a mixed ZooKeeper / ClickHouse Keeper cluster is impossible. -ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth`, `digest`, `host` and `ip`. Digest authentication scheme uses pair `username:password`. Password is encoded in Base64. +ClickHouse Keeper supports Access Control Lists (ACLs) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth`, `digest`, `host` and `ip`. The digest authentication scheme uses the pair `username:password`, the password is encoded in Base64. :::note External integrations are not supported. @@ -21,25 +21,25 @@ External integrations are not supported. ## Configuration {#configuration} -ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server, but in both cases configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is ``. Keeper configuration has the following parameters: +ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server. In both cases the configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is ``. Keeper configuration has the following parameters: - `tcp_port` — Port for a client to connect (default for ZooKeeper is `2181`). - `tcp_port_secure` — Secure port for an SSL connection between client and keeper-server. - `server_id` — Unique server id, each participant of the ClickHouse Keeper cluster must have a unique number (1, 2, 3, and so on). -- `log_storage_path` — Path to coordination logs, better to store logs on the non-busy device (same for ZooKeeper). +- `log_storage_path` — Path to coordination logs, just like ZooKeeper it is best to store logs on non-busy nodes. - `snapshot_storage_path` — Path to coordination snapshots. Other common parameters are inherited from the ClickHouse server config (`listen_host`, `logger`, and so on). -Internal coordination settings are located in `.` section: +Internal coordination settings are located in the `.` section: - `operation_timeout_ms` — Timeout for a single client operation (ms) (default: 10000). - `min_session_timeout_ms` — Min timeout for client session (ms) (default: 10000). - `session_timeout_ms` — Max timeout for client session (ms) (default: 100000). -- `dead_session_check_period_ms` — How often ClickHouse Keeper check dead sessions and remove them (ms) (default: 500). +- `dead_session_check_period_ms` — How often ClickHouse Keeper checks for dead sessions and removes them (ms) (default: 500). - `heart_beat_interval_ms` — How often a ClickHouse Keeper leader will send heartbeats to followers (ms) (default: 500). -- `election_timeout_lower_bound_ms` — If the follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election (default: 1000). -- `election_timeout_upper_bound_ms` — If the follower didn't receive heartbeats from the leader in this interval, then it must initiate leader election (default: 2000). +- `election_timeout_lower_bound_ms` — If the follower does not receive a heartbeat from the leader in this interval, then it can initiate leader election (default: 1000). +- `election_timeout_upper_bound_ms` — If the follower does not receive a heartbeat from the leader in this interval, then it must initiate leader election (default: 2000). - `rotate_log_storage_interval` — How many log records to store in a single file (default: 100000). - `reserved_log_items` — How many coordination log records to store before compaction (default: 100000). - `snapshot_distance` — How often ClickHouse Keeper will create new snapshots (in the number of records in logs) (default: 100000). @@ -55,7 +55,7 @@ Internal coordination settings are located in `..` section and contain servers description. +Quorum configuration is located in the `.` section and contain servers description. The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The parameter can be set `true` if SSL connection is required for internal communication between nodes, or left unspecified otherwise. @@ -66,7 +66,7 @@ The main parameters for each `` are: - `port` — Port where this server listens for connections. :::note -In the case of a change in the topology of your ClickHouse Keeper cluster (eg. replacing a server), please make sure to keep the mapping `server_id` to `hostname` consistent and avoid shuffling or reusing an existing `server_id` for different servers (eg. it can happen if your rely on automation scripts to deploy ClickHouse Keeper) +In the case of a change in the topology of your ClickHouse Keeper cluster (e.g., replacing a server), please make sure to keep the mapping of `server_id` to `hostname` consistent and avoid shuffling or reusing an existing `server_id` for different servers (e.g., it can happen if your rely on automation scripts to deploy ClickHouse Keeper) ::: Examples of configuration for quorum with three nodes can be found in [integration tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) with `test_keeper_` prefix. Example configuration for server #1: @@ -112,7 +112,7 @@ ClickHouse Keeper is bundled into the ClickHouse server package, just add config clickhouse-keeper --config /etc/your_path_to_config/config.xml ``` -If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as argument: +If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as an argument to `clickhouse`: ```bash clickhouse keeper --config /etc/your_path_to_config/config.xml diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 582e90544e0..4a5431fa57c 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -34,7 +34,7 @@ You can also declare attributes as coming from environment variables by using `f The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). -If you want to replace an entire element with a substitution use `include` as element name. +If you want to replace an entire element with a substitution use `include` as the element name. XML substitution example: diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index e8232483f6f..31625b0fed6 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -1,7 +1,7 @@ # zookeeper {#system-zookeeper} -The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. -The query must either have a ‘path =’ condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for. +The table does not exist unless ClickHouse Keeper or ZooKeeper is configured. Th `system.zookeeper` exposes data from the Keeper cluster defined in the config. +The query must either have a ‘path =’ condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children that you want to get data for. The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. To output data for all root nodes, write path = ‘/’. @@ -9,7 +9,7 @@ If the path specified in ‘path’ does not exist, an exception will be thrown. The query `SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` outputs data for all children on the `/` and `/clickhouse` node. If in the specified ‘path’ collection has does not exist path, an exception will be thrown. -It can be used to do a batch of ZooKeeper path queries. +It can be used to do a batch of Keeper path queries. Columns: diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index c727c636579..26dc59d72ba 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -118,11 +118,11 @@ in XML configuration. This is important for ClickHouse to be able to get correct information with `cpuid` instruction. Otherwise you may get `Illegal instruction` crashes when hypervisor is run on old CPU models. -## ZooKeeper {#zookeeper} +## ClickHouse Keeper and ZooKeeper {#zookeeper} -You are probably already using ZooKeeper for other purposes. You can use the same installation of ZooKeeper, if it isn’t already overloaded. +ClickHouse Keeper is recommended to replace ZooKeeper for ClickHouse clusters. See the documentation for [ClickHouse Keeper](clickhouse-keeper.md) -It’s best to use a fresh version of ZooKeeper – 3.4.9 or later. The version in stable Linux distributions may be outdated. +If you would like to continue using ZooKeeper then it is best to use a fresh version of ZooKeeper – 3.4.9 or later. The version in stable Linux distributions may be outdated. You should never use manually written scripts to transfer data between different ZooKeeper clusters, because the result will be incorrect for sequential nodes. Never use the “zkcopy” utility for the same reason: https://github.com/ksprojects/zkcopy/issues/15 diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index f152c177992..f3806d1afbc 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -11,11 +11,11 @@ Copies data from the tables in one cluster to tables in another (or the same) cl To get a consistent copy, the data in the source tables and partitions should not change during the entire process. ::: -You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ZooKeeper is used for syncing the processes. +You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ClickHouse Keeper, or ZooKeeper, is used for syncing the processes. After starting, `clickhouse-copier`: -- Connects to ZooKeeper and receives: +- Connects to ClickHouse Keeper and receives: - Copying jobs. - The state of the copying jobs. @@ -24,7 +24,7 @@ After starting, `clickhouse-copier`: Each running process chooses the “closest” shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary. -`clickhouse-copier` tracks the changes in ZooKeeper and applies them on the fly. +`clickhouse-copier` tracks the changes in ClickHouse Keeper and applies them on the fly. To reduce network traffic, we recommend running `clickhouse-copier` on the same server where the source data is located. @@ -33,19 +33,19 @@ To reduce network traffic, we recommend running `clickhouse-copier` on the same The utility should be run manually: ``` bash -$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir +$ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-dir /path/to/dir ``` Parameters: - `daemon` — Starts `clickhouse-copier` in daemon mode. -- `config` — The path to the `zookeeper.xml` file with the parameters for the connection to ZooKeeper. -- `task-path` — The path to the ZooKeeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. -- `task-file` — Optional path to file with task configuration for initial upload to ZooKeeper. +- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. +- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. +- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. - `task-upload-force` — Force upload `task-file` even if node already exists. - `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. -## Format of Zookeeper.xml {#format-of-zookeeper-xml} +## Format of keeper.xml {#format-of-zookeeper-xml} ``` xml From 93e0e72fcbea5fea5cdb890fd369a4a4fa319cdd Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Fri, 20 May 2022 17:43:23 -0400 Subject: [PATCH 352/615] typo --- docs/en/operations/system-tables/zookeeper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index 31625b0fed6..ec67d2780e3 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -1,6 +1,6 @@ # zookeeper {#system-zookeeper} -The table does not exist unless ClickHouse Keeper or ZooKeeper is configured. Th `system.zookeeper` exposes data from the Keeper cluster defined in the config. +The table does not exist unless ClickHouse Keeper or ZooKeeper is configured. The `system.zookeeper` table exposes data from the Keeper cluster defined in the config. The query must either have a ‘path =’ condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children that you want to get data for. The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. From 4c13b52b6a51e923b7cd7ec88013a74d7bc831ae Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 20 May 2022 19:59:17 -0400 Subject: [PATCH 353/615] scope guard resources --- .../System/StorageSystemCertificates.cpp | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp index 65440c74471..6e37046e2b4 100644 --- a/src/Storages/System/StorageSystemCertificates.cpp +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -45,6 +45,10 @@ static std::unordered_set parse_dir(const std::string & dir) static void populateTable(const X509 * cert, MutableColumns & res_columns, const std::string & path, bool def) { BIO * b = BIO_new(BIO_s_mem()); + SCOPE_EXIT( + { + BIO_free(b); + }); size_t col = 0; res_columns[col++]->insert(X509_get_version(cert) + 1); @@ -53,11 +57,14 @@ static void populateTable(const X509 * cert, MutableColumns & res_columns, const char buf[1024] = {0}; const ASN1_INTEGER * sn = cert->cert_info->serialNumber; BIGNUM * bnsn = ASN1_INTEGER_to_BN(sn, nullptr); + SCOPE_EXIT( + { + BN_free(bnsn); + }); if (BN_print(b, bnsn) > 0 && BIO_read(b, buf, sizeof(buf)) > 0) res_columns[col]->insert(buf); else res_columns[col]->insertDefault(); - BN_free(bnsn); } ++col; @@ -79,8 +86,11 @@ static void populateTable(const X509 * cert, MutableColumns & res_columns, const char * issuer = X509_NAME_oneline(cert->cert_info->issuer, nullptr, 0); if (issuer) { + SCOPE_EXIT( + { + OPENSSL_free(issuer); + }); res_columns[col]->insert(issuer); - OPENSSL_free(issuer); } else res_columns[col]->insertDefault(); @@ -107,8 +117,11 @@ static void populateTable(const X509 * cert, MutableColumns & res_columns, const char * subject = X509_NAME_oneline(cert->cert_info->subject, nullptr, 0); if (subject) { + SCOPE_EXIT( + { + OPENSSL_free(subject); + }); res_columns[col]->insert(subject); - OPENSSL_free(subject); } else res_columns[col]->insertDefault(); @@ -133,8 +146,6 @@ static void populateTable(const X509 * cert, MutableColumns & res_columns, const res_columns[col++]->insert(path); res_columns[col++]->insert(def); - - BIO_free(b); } static void enumCertificates(const std::string & dir, bool def, MutableColumns & res_columns) From 1b453f517d2263c4b431865fcbf0d11313d61324 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sat, 21 May 2022 02:32:35 +0200 Subject: [PATCH 354/615] fix --- src/Interpreters/TransactionLog.cpp | 8 ++++---- src/Interpreters/TransactionLog.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 6fe89a2b80e..699190e2d6f 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -353,7 +353,7 @@ void TransactionLog::tryFinalizeUnknownStateTransactions() /// CSNs must be already loaded, only need to check if the corresponding mapping exists. if (auto csn = getCSN(txn->tid)) { - finalizeCommittedTransaction(txn, csn); + finalizeCommittedTransaction(txn, csn, state_guard); } else { @@ -452,10 +452,10 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool allocated_csn = deserializeCSN(csn_path_created.substr(zookeeper_path_log.size() + 1)); } - return finalizeCommittedTransaction(txn.get(), allocated_csn); + return finalizeCommittedTransaction(txn.get(), allocated_csn, state_guard); } -CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN allocated_csn) noexcept +CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN allocated_csn, scope_guard & state_guard) noexcept { chassert(!allocated_csn == txn->isReadOnly()); if (allocated_csn) @@ -472,10 +472,10 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN /// Write allocated CSN, so we will be able to cleanup log in ZK. This method is noexcept. txn->afterCommit(allocated_csn); + state_guard = {}; { /// Finally we can remove transaction from the list and release the snapshot - MergeTreeTransactionPtr txn_ptr; std::lock_guard lock{running_list_mutex}; snapshots_in_use.erase(txn->snapshot_in_use_it); bool removed = running_list.erase(txn->tid.getHash()); diff --git a/src/Interpreters/TransactionLog.h b/src/Interpreters/TransactionLog.h index 25892f77bd7..a0268ce9b88 100644 --- a/src/Interpreters/TransactionLog.h +++ b/src/Interpreters/TransactionLog.h @@ -134,7 +134,7 @@ private: void loadNewEntries(); void removeOldEntries(); - CSN finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN allocated_csn) noexcept; + CSN finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN allocated_csn, scope_guard & state_guard) noexcept; void tryFinalizeUnknownStateTransactions(); From 58f4a86ec7cc644ea549652ea07456467f56f6f0 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 16 May 2022 20:43:55 +0200 Subject: [PATCH 355/615] Rework notifications used in access management. --- programs/server/Server.cpp | 5 +- src/Access/AccessChangesNotifier.cpp | 122 +++++++++++ src/Access/AccessChangesNotifier.h | 73 +++++++ src/Access/AccessControl.cpp | 93 +++++---- src/Access/AccessControl.h | 38 +++- src/Access/ContextAccess.cpp | 15 ++ src/Access/ContextAccess.h | 2 + src/Access/DiskAccessStorage.cpp | 123 +++-------- src/Access/DiskAccessStorage.h | 20 +- src/Access/EnabledRoles.cpp | 54 ++--- src/Access/EnabledRoles.h | 18 +- src/Access/IAccessStorage.cpp | 28 --- src/Access/IAccessStorage.h | 34 +-- src/Access/LDAPAccessStorage.cpp | 40 +--- src/Access/LDAPAccessStorage.h | 10 +- src/Access/MemoryAccessStorage.cpp | 121 ++--------- src/Access/MemoryAccessStorage.h | 23 +-- src/Access/MultipleAccessStorage.cpp | 164 ++------------- src/Access/MultipleAccessStorage.h | 11 +- src/Access/ReplicatedAccessStorage.cpp | 193 ++++++------------ src/Access/ReplicatedAccessStorage.h | 40 ++-- src/Access/RoleCache.cpp | 13 +- src/Access/RoleCache.h | 4 +- src/Access/UsersConfigAccessStorage.cpp | 35 +--- src/Access/UsersConfigAccessStorage.h | 15 +- .../tests/gtest_replicated_access_storage.cpp | 19 +- src/Interpreters/Context.cpp | 2 - 27 files changed, 561 insertions(+), 754 deletions(-) create mode 100644 src/Access/AccessChangesNotifier.cpp create mode 100644 src/Access/AccessChangesNotifier.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 752ff51ba4f..e5334d32a9f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1314,7 +1314,7 @@ int Server::main(const std::vector & /*args*/) global_context->setConfigReloadCallback([&]() { main_config_reloader->reload(); - access_control.reloadUsersConfigs(); + access_control.reload(); }); /// Limit on total number of concurrently executed queries. @@ -1406,6 +1406,7 @@ int Server::main(const std::vector & /*args*/) /// Stop reloading of the main config. This must be done before `global_context->shutdown()` because /// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart. main_config_reloader.reset(); + access_control.stopPeriodicReloading(); async_metrics.stop(); @@ -1629,7 +1630,7 @@ int Server::main(const std::vector & /*args*/) buildLoggers(config(), logger()); main_config_reloader->start(); - access_control.startPeriodicReloadingUsersConfigs(); + access_control.startPeriodicReloading(); if (dns_cache_updater) dns_cache_updater->start(); diff --git a/src/Access/AccessChangesNotifier.cpp b/src/Access/AccessChangesNotifier.cpp new file mode 100644 index 00000000000..05516285efb --- /dev/null +++ b/src/Access/AccessChangesNotifier.cpp @@ -0,0 +1,122 @@ +#include +#include + + +namespace DB +{ + +AccessChangesNotifier::AccessChangesNotifier() : handlers(std::make_shared()) +{ +} + +AccessChangesNotifier::~AccessChangesNotifier() = default; + +void AccessChangesNotifier::onEntityAdded(const UUID & id, const AccessEntityPtr & new_entity) +{ + std::lock_guard lock{queue_mutex}; + Event event; + event.id = id; + event.entity = new_entity; + event.type = new_entity->getType(); + queue.push(std::move(event)); +} + +void AccessChangesNotifier::onEntityUpdated(const UUID & id, const AccessEntityPtr & changed_entity) +{ + std::lock_guard lock{queue_mutex}; + Event event; + event.id = id; + event.entity = changed_entity; + event.type = changed_entity->getType(); + queue.push(std::move(event)); +} + +void AccessChangesNotifier::onEntityRemoved(const UUID & id, AccessEntityType type) +{ + std::lock_guard lock{queue_mutex}; + Event event; + event.id = id; + event.type = type; + queue.push(std::move(event)); +} + +scope_guard AccessChangesNotifier::subscribeForChanges(AccessEntityType type, const OnChangedHandler & handler) +{ + std::lock_guard lock{handlers->mutex}; + auto & list = handlers->by_type[static_cast(type)]; + list.push_back(handler); + auto handler_it = std::prev(list.end()); + + return [handlers=handlers, type, handler_it] + { + std::lock_guard lock2{handlers->mutex}; + auto & list2 = handlers->by_type[static_cast(type)]; + list2.erase(handler_it); + }; +} + +scope_guard AccessChangesNotifier::subscribeForChanges(const UUID & id, const OnChangedHandler & handler) +{ + std::lock_guard lock{handlers->mutex}; + auto it = handlers->by_id.emplace(id, std::list{}).first; + auto & list = it->second; + list.push_back(handler); + auto handler_it = std::prev(list.end()); + + return [handlers=handlers, it, handler_it] + { + std::lock_guard lock2{handlers->mutex}; + auto & list2 = it->second; + list2.erase(handler_it); + if (list2.empty()) + handlers->by_id.erase(it); + }; +} + + +scope_guard AccessChangesNotifier::subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) +{ + scope_guard subscriptions; + for (const auto & id : ids) + subscriptions.join(subscribeForChanges(id, handler)); + return subscriptions; +} + +void AccessChangesNotifier::sendNotifications() +{ + /// Only one thread can send notification at any time. + std::lock_guard sending_notifications_lock{sending_notifications}; + + std::unique_lock queue_lock{queue_mutex}; + while (!queue.empty()) + { + auto event = std::move(queue.front()); + queue.pop(); + queue_lock.unlock(); + + std::vector current_handlers; + { + std::lock_guard handlers_lock{handlers->mutex}; + boost::range::copy(handlers->by_type[static_cast(event.type)], std::back_inserter(current_handlers)); + auto it = handlers->by_id.find(event.id); + if (it != handlers->by_id.end()) + boost::range::copy(it->second, std::back_inserter(current_handlers)); + } + + for (const auto & handler : current_handlers) + { + try + { + handler(event.id, event.entity); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + queue_lock.lock(); + } +} + +} diff --git a/src/Access/AccessChangesNotifier.h b/src/Access/AccessChangesNotifier.h new file mode 100644 index 00000000000..46a7cdf26b6 --- /dev/null +++ b/src/Access/AccessChangesNotifier.h @@ -0,0 +1,73 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + +/// Helper class implementing subscriptions and notifications in access management. +class AccessChangesNotifier +{ +public: + AccessChangesNotifier(); + ~AccessChangesNotifier(); + + using OnChangedHandler + = std::function; + + /// Subscribes for all changes. + /// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only). + scope_guard subscribeForChanges(AccessEntityType type, const OnChangedHandler & handler); + + template + scope_guard subscribeForChanges(OnChangedHandler handler) + { + return subscribeForChanges(EntityClassT::TYPE, handler); + } + + /// Subscribes for changes of a specific entry. + /// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only). + scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler); + scope_guard subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler); + + /// Called by access storages after a new access entity has been added. + void onEntityAdded(const UUID & id, const AccessEntityPtr & new_entity); + + /// Called by access storages after an access entity has been changed. + void onEntityUpdated(const UUID & id, const AccessEntityPtr & changed_entity); + + /// Called by access storages after an access entity has been removed. + void onEntityRemoved(const UUID & id, AccessEntityType type); + + /// Sends notifications to subscribers about changes in access entities + /// (added with previous calls onEntityAdded(), onEntityUpdated(), onEntityRemoved()). + void sendNotifications(); + +private: + struct Handlers + { + std::unordered_map> by_id; + std::list by_type[static_cast(AccessEntityType::MAX)]; + std::mutex mutex; + }; + + /// shared_ptr is here for safety because AccessChangesNotifier can be destroyed before all subscriptions are removed. + std::shared_ptr handlers; + + struct Event + { + UUID id; + AccessEntityPtr entity; + AccessEntityType type; + }; + std::queue queue; + std::mutex queue_mutex; + std::mutex sending_notifications; +}; + +} diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index d74695e645e..c602e01623c 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -142,7 +143,8 @@ AccessControl::AccessControl() quota_cache(std::make_unique(*this)), settings_profiles_cache(std::make_unique(*this)), external_authenticators(std::make_unique()), - custom_settings_prefixes(std::make_unique()) + custom_settings_prefixes(std::make_unique()), + changes_notifier(std::make_unique()) { } @@ -231,35 +233,6 @@ void AccessControl::addUsersConfigStorage( LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath()); } -void AccessControl::reloadUsersConfigs() -{ - auto storages = getStoragesPtr(); - for (const auto & storage : *storages) - { - if (auto users_config_storage = typeid_cast>(storage)) - users_config_storage->reload(); - } -} - -void AccessControl::startPeriodicReloadingUsersConfigs() -{ - auto storages = getStoragesPtr(); - for (const auto & storage : *storages) - { - if (auto users_config_storage = typeid_cast>(storage)) - users_config_storage->startPeriodicReloading(); - } -} - -void AccessControl::stopPeriodicReloadingUsersConfigs() -{ - auto storages = getStoragesPtr(); - for (const auto & storage : *storages) - { - if (auto users_config_storage = typeid_cast>(storage)) - users_config_storage->stopPeriodicReloading(); - } -} void AccessControl::addReplicatedStorage( const String & storage_name_, @@ -272,10 +245,9 @@ void AccessControl::addReplicatedStorage( if (auto replicated_storage = typeid_cast>(storage)) return; } - auto new_storage = std::make_shared(storage_name_, zookeeper_path_, get_zookeeper_function_); + auto new_storage = std::make_shared(storage_name_, zookeeper_path_, get_zookeeper_function_, *changes_notifier); addStorage(new_storage); LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName()); - new_storage->startup(); } void AccessControl::addDiskStorage(const String & directory_, bool readonly_) @@ -298,7 +270,7 @@ void AccessControl::addDiskStorage(const String & storage_name_, const String & } } } - auto new_storage = std::make_shared(storage_name_, directory_, readonly_); + auto new_storage = std::make_shared(storage_name_, directory_, readonly_, *changes_notifier); addStorage(new_storage); LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath()); } @@ -312,7 +284,7 @@ void AccessControl::addMemoryStorage(const String & storage_name_) if (auto memory_storage = typeid_cast>(storage)) return; } - auto new_storage = std::make_shared(storage_name_); + auto new_storage = std::make_shared(storage_name_, *changes_notifier); addStorage(new_storage); LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName()); } @@ -320,7 +292,7 @@ void AccessControl::addMemoryStorage(const String & storage_name_) void AccessControl::addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_) { - auto new_storage = std::make_shared(storage_name_, this, config_, prefix_); + auto new_storage = std::make_shared(storage_name_, *this, config_, prefix_); addStorage(new_storage); LOG_DEBUG(getLogger(), "Added {} access storage '{}', LDAP server name: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getLDAPServerName()); } @@ -423,6 +395,57 @@ void AccessControl::addStoragesFromMainConfig( } +void AccessControl::reload() +{ + MultipleAccessStorage::reload(); + changes_notifier->sendNotifications(); +} + +scope_guard AccessControl::subscribeForChanges(AccessEntityType type, const OnChangedHandler & handler) const +{ + return changes_notifier->subscribeForChanges(type, handler); +} + +scope_guard AccessControl::subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const +{ + return changes_notifier->subscribeForChanges(id, handler); +} + +scope_guard AccessControl::subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) const +{ + return changes_notifier->subscribeForChanges(ids, handler); +} + +std::optional AccessControl::insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) +{ + auto id = MultipleAccessStorage::insertImpl(entity, replace_if_exists, throw_if_exists); + if (id) + changes_notifier->sendNotifications(); + return id; +} + +bool AccessControl::removeImpl(const UUID & id, bool throw_if_not_exists) +{ + bool removed = MultipleAccessStorage::removeImpl(id, throw_if_not_exists); + if (removed) + changes_notifier->sendNotifications(); + return removed; +} + +bool AccessControl::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) +{ + bool updated = MultipleAccessStorage::updateImpl(id, update_func, throw_if_not_exists); + if (updated) + changes_notifier->sendNotifications(); + return updated; +} + +AccessChangesNotifier & AccessControl::getChangesNotifier() +{ + return *changes_notifier; +} + + UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const { try diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 4ee29aa20c7..cbc71241316 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -3,8 +3,8 @@ #include #include #include +#include #include -#include #include @@ -40,6 +40,7 @@ class SettingsProfilesCache; class SettingsProfileElements; class ClientInfo; class ExternalAuthenticators; +class AccessChangesNotifier; struct Settings; @@ -50,6 +51,7 @@ public: AccessControl(); ~AccessControl() override; + /// Initializes access storage (user directories). void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); @@ -74,9 +76,6 @@ public: const String & preprocessed_dir_, const zkutil::GetZooKeeper & get_zookeeper_function_ = {}); - void reloadUsersConfigs(); - void startPeriodicReloadingUsersConfigs(); - void stopPeriodicReloadingUsersConfigs(); /// Loads access entities from the directory on the local disk. /// Use that directory to keep created users/roles/etc. void addDiskStorage(const String & directory_, bool readonly_ = false); @@ -106,6 +105,26 @@ public: const String & config_path, const zkutil::GetZooKeeper & get_zookeeper_function); + /// Reloads and updates entities in this storage. This function is used to implement SYSTEM RELOAD CONFIG. + void reload() override; + + using OnChangedHandler = std::function; + + /// Subscribes for all changes. + /// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only). + scope_guard subscribeForChanges(AccessEntityType type, const OnChangedHandler & handler) const; + + template + scope_guard subscribeForChanges(OnChangedHandler handler) const { return subscribeForChanges(EntityClassT::TYPE, handler); } + + /// Subscribes for changes of a specific entry. + /// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only). + scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const; + scope_guard subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) const; + + UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; + void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); + /// Sets the default profile's name. /// The default profile's settings are always applied before any other profile's. void setDefaultProfileName(const String & default_profile_name); @@ -135,9 +154,6 @@ public: void setOnClusterQueriesRequireClusterGrant(bool enable) { on_cluster_queries_require_cluster_grant = enable; } bool doesOnClusterQueriesRequireClusterGrant() const { return on_cluster_queries_require_cluster_grant; } - UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; - void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); - std::shared_ptr getContextAccess( const UUID & user_id, const std::vector & current_roles, @@ -178,10 +194,17 @@ public: const ExternalAuthenticators & getExternalAuthenticators() const; + /// Gets manager of notifications. + AccessChangesNotifier & getChangesNotifier(); + private: class ContextAccessCache; class CustomSettingsPrefixes; + std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; + bool removeImpl(const UUID & id, bool throw_if_not_exists) override; + bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; + std::unique_ptr context_access_cache; std::unique_ptr role_cache; std::unique_ptr row_policy_cache; @@ -189,6 +212,7 @@ private: std::unique_ptr settings_profiles_cache; std::unique_ptr external_authenticators; std::unique_ptr custom_settings_prefixes; + std::unique_ptr changes_notifier; std::atomic_bool allow_plaintext_password = true; std::atomic_bool allow_no_password = true; std::atomic_bool users_without_row_policies_can_read_rows = false; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 28926310c20..46fdba9d65e 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -149,6 +149,21 @@ ContextAccess::ContextAccess(const AccessControl & access_control_, const Params } +ContextAccess::~ContextAccess() +{ + enabled_settings.reset(); + enabled_quota.reset(); + enabled_row_policies.reset(); + access_with_implicit.reset(); + access.reset(); + roles_info.reset(); + subscription_for_roles_changes.reset(); + enabled_roles.reset(); + subscription_for_user_change.reset(); + user.reset(); +} + + void ContextAccess::initialize() { std::lock_guard lock{mutex}; diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 5742b6a3222..f1c215a4029 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -155,6 +155,8 @@ public: /// without any limitations. This is used for the global context. static std::shared_ptr getFullAccess(); + ~ContextAccess(); + private: friend class AccessControl; ContextAccess() {} /// NOLINT diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 95d58f9da87..57e09d40b35 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -164,13 +165,8 @@ namespace } -DiskAccessStorage::DiskAccessStorage(const String & directory_path_, bool readonly_) - : DiskAccessStorage(STORAGE_TYPE, directory_path_, readonly_) -{ -} - -DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_) - : IAccessStorage(storage_name_) +DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_, AccessChangesNotifier & changes_notifier_) + : IAccessStorage(storage_name_), changes_notifier(changes_notifier_) { directory_path = makeDirectoryPathCanonical(directory_path_); readonly = readonly_; @@ -199,7 +195,15 @@ DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String DiskAccessStorage::~DiskAccessStorage() { stopListsWritingThread(); - writeLists(); + + try + { + writeLists(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } @@ -470,19 +474,16 @@ std::optional DiskAccessStorage::readNameImpl(const UUID & id, bool thro std::optional DiskAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); - UUID id = generateRandomID(); std::lock_guard lock{mutex}; - if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, notifications)) + if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists)) return id; return std::nullopt; } -bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, Notifications & notifications) +bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) { const String & name = new_entity->getName(); AccessEntityType type = new_entity->getType(); @@ -514,7 +515,7 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne writeAccessEntityToDisk(id, *new_entity); if (name_collision && replace_if_exists) - removeNoLock(it_by_name->second->id, /* throw_if_not_exists = */ false, notifications); + removeNoLock(it_by_name->second->id, /* throw_if_not_exists = */ false); /// Do insertion. auto & entry = entries_by_id[id]; @@ -523,22 +524,20 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne entry.name = name; entry.entity = new_entity; entries_by_name[entry.name] = &entry; - prepareNotifications(id, entry, false, notifications); + + changes_notifier.onEntityAdded(id, new_entity); return true; } bool DiskAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists) { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); - std::lock_guard lock{mutex}; - return removeNoLock(id, throw_if_not_exists, notifications); + return removeNoLock(id, throw_if_not_exists); } -bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists, Notifications & notifications) +bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists) { auto it = entries_by_id.find(id); if (it == entries_by_id.end()) @@ -559,25 +558,24 @@ bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists, deleteAccessEntityOnDisk(id); /// Do removing. - prepareNotifications(id, entry, true, notifications); + UUID removed_id = id; auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; entries_by_name.erase(entry.name); entries_by_id.erase(it); + + changes_notifier.onEntityRemoved(removed_id, type); return true; } bool DiskAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); - std::lock_guard lock{mutex}; - return updateNoLock(id, update_func, throw_if_not_exists, notifications); + return updateNoLock(id, update_func, throw_if_not_exists); } -bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, Notifications & notifications) +bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { auto it = entries_by_id.find(id); if (it == entries_by_id.end()) @@ -626,7 +624,8 @@ bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_ entries_by_name[entry.name] = &entry; } - prepareNotifications(id, entry, false, notifications); + changes_notifier.onEntityUpdated(id, new_entity); + return true; } @@ -650,74 +649,4 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const throw Exception("Couldn't delete " + file_path, ErrorCodes::FILE_DOESNT_EXIST); } - -void DiskAccessStorage::prepareNotifications(const UUID & id, const Entry & entry, bool remove, Notifications & notifications) const -{ - if (!remove && !entry.entity) - return; - - const AccessEntityPtr entity = remove ? nullptr : entry.entity; - for (const auto & handler : entry.handlers_by_id) - notifications.push_back({handler, id, entity}); - - for (const auto & handler : handlers_by_type[static_cast(entry.type)]) - notifications.push_back({handler, id, entity}); -} - - -scope_guard DiskAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const -{ - std::lock_guard lock{mutex}; - auto it = entries_by_id.find(id); - if (it == entries_by_id.end()) - return {}; - const Entry & entry = it->second; - auto handler_it = entry.handlers_by_id.insert(entry.handlers_by_id.end(), handler); - - return [this, id, handler_it] - { - std::lock_guard lock2{mutex}; - auto it2 = entries_by_id.find(id); - if (it2 != entries_by_id.end()) - { - const Entry & entry2 = it2->second; - entry2.handlers_by_id.erase(handler_it); - } - }; -} - -scope_guard DiskAccessStorage::subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const -{ - std::lock_guard lock{mutex}; - auto & handlers = handlers_by_type[static_cast(type)]; - handlers.push_back(handler); - auto handler_it = std::prev(handlers.end()); - - return [this, type, handler_it] - { - std::lock_guard lock2{mutex}; - auto & handlers2 = handlers_by_type[static_cast(type)]; - handlers2.erase(handler_it); - }; -} - -bool DiskAccessStorage::hasSubscription(const UUID & id) const -{ - std::lock_guard lock{mutex}; - auto it = entries_by_id.find(id); - if (it != entries_by_id.end()) - { - const Entry & entry = it->second; - return !entry.handlers_by_id.empty(); - } - return false; -} - -bool DiskAccessStorage::hasSubscription(AccessEntityType type) const -{ - std::lock_guard lock{mutex}; - const auto & handlers = handlers_by_type[static_cast(type)]; - return !handlers.empty(); -} - } diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 20390dabfa0..7784a80e779 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -7,14 +7,15 @@ namespace DB { +class AccessChangesNotifier; + /// Loads and saves access entities on a local disk to a specified directory. class DiskAccessStorage : public IAccessStorage { public: static constexpr char STORAGE_TYPE[] = "local directory"; - DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_ = false); - DiskAccessStorage(const String & directory_path_, bool readonly_ = false); + DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_, AccessChangesNotifier & changes_notifier_); ~DiskAccessStorage() override; const char * getStorageType() const override { return STORAGE_TYPE; } @@ -27,8 +28,6 @@ public: bool isReadOnly() const override { return readonly; } bool exists(const UUID & id) const override; - bool hasSubscription(const UUID & id) const override; - bool hasSubscription(AccessEntityType type) const override; private: std::optional findImpl(AccessEntityType type, const String & name) const override; @@ -38,8 +37,6 @@ private: std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override; bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; - scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; - scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; void clear(); bool readLists(); @@ -50,9 +47,9 @@ private: void listsWritingThreadFunc(); void stopListsWritingThread(); - bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, Notifications & notifications); - bool removeNoLock(const UUID & id, bool throw_if_not_exists, Notifications & notifications); - bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, Notifications & notifications); + bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists); + bool removeNoLock(const UUID & id, bool throw_if_not_exists); + bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); AccessEntityPtr readAccessEntityFromDisk(const UUID & id) const; void writeAccessEntityToDisk(const UUID & id, const IAccessEntity & entity) const; @@ -65,11 +62,8 @@ private: String name; AccessEntityType type; mutable AccessEntityPtr entity; /// may be nullptr, if the entity hasn't been loaded yet. - mutable std::list handlers_by_id; }; - void prepareNotifications(const UUID & id, const Entry & entry, bool remove, Notifications & notifications) const; - String directory_path; std::atomic readonly; std::unordered_map entries_by_id; @@ -79,7 +73,7 @@ private: ThreadFromGlobalPool lists_writing_thread; /// List files are written in a separate thread. std::condition_variable lists_writing_thread_should_exit; /// Signals `lists_writing_thread` to exit. bool lists_writing_thread_is_waiting = false; - mutable std::list handlers_by_type[static_cast(AccessEntityType::MAX)]; + AccessChangesNotifier & changes_notifier; mutable std::mutex mutex; }; } diff --git a/src/Access/EnabledRoles.cpp b/src/Access/EnabledRoles.cpp index 282c52a9544..456529da942 100644 --- a/src/Access/EnabledRoles.cpp +++ b/src/Access/EnabledRoles.cpp @@ -6,7 +6,7 @@ namespace DB { -EnabledRoles::EnabledRoles(const Params & params_) : params(params_) +EnabledRoles::EnabledRoles(const Params & params_) : params(params_), handlers(std::make_shared()) { } @@ -15,42 +15,50 @@ EnabledRoles::~EnabledRoles() = default; std::shared_ptr EnabledRoles::getRolesInfo() const { - std::lock_guard lock{mutex}; + std::lock_guard lock{info_mutex}; return info; } scope_guard EnabledRoles::subscribeForChanges(const OnChangeHandler & handler) const { - std::lock_guard lock{mutex}; - handlers.push_back(handler); - auto it = std::prev(handlers.end()); + std::lock_guard lock{handlers->mutex}; + handlers->list.push_back(handler); + auto it = std::prev(handlers->list.end()); - return [this, it] + return [handlers=handlers, it] { - std::lock_guard lock2{mutex}; - handlers.erase(it); + std::lock_guard lock2{handlers->mutex}; + handlers->list.erase(it); }; } -void EnabledRoles::setRolesInfo(const std::shared_ptr & info_, scope_guard & notifications) +void EnabledRoles::setRolesInfo(const std::shared_ptr & info_, scope_guard * notifications) { - std::lock_guard lock{mutex}; - - if (info && info_ && *info == *info_) - return; - - info = info_; - - std::vector handlers_to_notify; - boost::range::copy(handlers, std::back_inserter(handlers_to_notify)); - - notifications.join(scope_guard([info = info, handlers_to_notify = std::move(handlers_to_notify)] { - for (const auto & handler : handlers_to_notify) - handler(info); - })); + std::lock_guard lock{info_mutex}; + if (info && info_ && *info == *info_) + return; + + info = info_; + } + + if (notifications) + { + std::vector handlers_to_notify; + { + std::lock_guard lock{handlers->mutex}; + boost::range::copy(handlers->list, std::back_inserter(handlers_to_notify)); + } + + notifications->join(scope_guard( + [info = info, handlers_to_notify = std::move(handlers_to_notify)] + { + for (const auto & handler : handlers_to_notify) + handler(info); + })); + } } } diff --git a/src/Access/EnabledRoles.h b/src/Access/EnabledRoles.h index 28d1f9ea376..e0d773db343 100644 --- a/src/Access/EnabledRoles.h +++ b/src/Access/EnabledRoles.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -43,12 +44,21 @@ private: friend class RoleCache; explicit EnabledRoles(const Params & params_); - void setRolesInfo(const std::shared_ptr & info_, scope_guard & notifications); + void setRolesInfo(const std::shared_ptr & info_, scope_guard * notifications); const Params params; - mutable std::shared_ptr info; - mutable std::list handlers; - mutable std::mutex mutex; + + std::shared_ptr info; + mutable std::mutex info_mutex; + + struct Handlers + { + std::list list; + std::mutex mutex; + }; + + /// shared_ptr is here for safety because EnabledRoles can be destroyed before all subscriptions are removed. + std::shared_ptr handlers; }; } diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 8c53216c638..6b04355099d 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -410,34 +410,6 @@ bool IAccessStorage::updateImpl(const UUID & id, const UpdateFunc &, bool throw_ } -scope_guard IAccessStorage::subscribeForChanges(AccessEntityType type, const OnChangedHandler & handler) const -{ - return subscribeForChangesImpl(type, handler); -} - - -scope_guard IAccessStorage::subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const -{ - return subscribeForChangesImpl(id, handler); -} - - -scope_guard IAccessStorage::subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) const -{ - scope_guard subscriptions; - for (const auto & id : ids) - subscriptions.join(subscribeForChangesImpl(id, handler)); - return subscriptions; -} - - -void IAccessStorage::notify(const Notifications & notifications) -{ - for (const auto & [fn, id, new_entity] : notifications) - fn(id, new_entity); -} - - UUID IAccessStorage::authenticate( const Credentials & credentials, const Poco::Net::IPAddress & address, diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 428a0e8f052..5de20cad286 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -22,7 +21,7 @@ enum class AuthenticationType; /// Contains entities, i.e. instances of classes derived from IAccessEntity. /// The implementations of this class MUST be thread-safe. -class IAccessStorage +class IAccessStorage : public boost::noncopyable { public: explicit IAccessStorage(const String & storage_name_) : storage_name(storage_name_) {} @@ -41,6 +40,15 @@ public: /// Returns true if this entity is readonly. virtual bool isReadOnly(const UUID &) const { return isReadOnly(); } + /// Reloads and updates entities in this storage. This function is used to implement SYSTEM RELOAD CONFIG. + virtual void reload() {} + + /// Starts periodic reloading and update of entities in this storage. + virtual void startPeriodicReloading() {} + + /// Stops periodic reloading and update of entities in this storage. + virtual void stopPeriodicReloading() {} + /// Returns the identifiers of all the entities of a specified type contained in the storage. std::vector findAll(AccessEntityType type) const; @@ -130,23 +138,6 @@ public: /// Updates multiple entities in the storage. Returns the list of successfully updated. std::vector tryUpdate(const std::vector & ids, const UpdateFunc & update_func); - using OnChangedHandler = std::function; - - /// Subscribes for all changes. - /// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only). - scope_guard subscribeForChanges(AccessEntityType type, const OnChangedHandler & handler) const; - - template - scope_guard subscribeForChanges(OnChangedHandler handler) const { return subscribeForChanges(EntityClassT::TYPE, handler); } - - /// Subscribes for changes of a specific entry. - /// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only). - scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const; - scope_guard subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) const; - - virtual bool hasSubscription(AccessEntityType type) const = 0; - virtual bool hasSubscription(const UUID & id) const = 0; - /// Finds a user, check the provided credentials and returns the ID of the user if they are valid. /// Throws an exception if no such user or credentials are invalid. UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool allow_no_password, bool allow_plaintext_password) const; @@ -160,8 +151,6 @@ protected: virtual std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); virtual bool removeImpl(const UUID & id, bool throw_if_not_exists); virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); - virtual scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const = 0; - virtual scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const = 0; virtual std::optional authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const; virtual bool areCredentialsValid(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const; virtual bool isAddressAllowed(const User & user, const Poco::Net::IPAddress & address) const; @@ -181,9 +170,6 @@ protected: [[noreturn]] static void throwAddressNotAllowed(const Poco::Net::IPAddress & address); [[noreturn]] static void throwInvalidCredentials(); [[noreturn]] static void throwAuthenticationTypeNotAllowed(AuthenticationType auth_type); - using Notification = std::tuple; - using Notifications = std::vector; - static void notify(const Notifications & notifications); private: const String storage_name; diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 0fe9e6a1605..480d0050e2a 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -27,10 +27,10 @@ namespace ErrorCodes } -LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix) - : IAccessStorage(storage_name_) +LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix) + : IAccessStorage(storage_name_), access_control(access_control_), memory_storage(storage_name_, access_control.getChangesNotifier()) { - setConfiguration(access_control_, config, prefix); + setConfiguration(config, prefix); } @@ -40,7 +40,7 @@ String LDAPAccessStorage::getLDAPServerName() const } -void LDAPAccessStorage::setConfiguration(AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix) +void LDAPAccessStorage::setConfiguration(const Poco::Util::AbstractConfiguration & config, const String & prefix) { std::scoped_lock lock(mutex); @@ -80,7 +80,6 @@ void LDAPAccessStorage::setConfiguration(AccessControl * access_control_, const } } - access_control = access_control_; ldap_server_name = ldap_server_name_cfg; role_search_params.swap(role_search_params_cfg); common_role_names.swap(common_roles_cfg); @@ -91,7 +90,7 @@ void LDAPAccessStorage::setConfiguration(AccessControl * access_control_, const granted_role_names.clear(); granted_role_ids.clear(); - role_change_subscription = access_control->subscribeForChanges( + role_change_subscription = access_control.subscribeForChanges( [this] (const UUID & id, const AccessEntityPtr & entity) { return this->processRoleChange(id, entity); @@ -215,7 +214,7 @@ void LDAPAccessStorage::assignRolesNoLock(User & user, const LDAPClient::SearchR auto it = granted_role_ids.find(role_name); if (it == granted_role_ids.end()) { - if (const auto role_id = access_control->find(role_name)) + if (const auto role_id = access_control.find(role_name)) { granted_role_names.insert_or_assign(*role_id, role_name); it = granted_role_ids.insert_or_assign(role_name, *role_id).first; @@ -450,33 +449,6 @@ std::optional LDAPAccessStorage::readNameImpl(const UUID & id, bool thro } -scope_guard LDAPAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const -{ - std::scoped_lock lock(mutex); - return memory_storage.subscribeForChanges(id, handler); -} - - -scope_guard LDAPAccessStorage::subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const -{ - std::scoped_lock lock(mutex); - return memory_storage.subscribeForChanges(type, handler); -} - - -bool LDAPAccessStorage::hasSubscription(const UUID & id) const -{ - std::scoped_lock lock(mutex); - return memory_storage.hasSubscription(id); -} - - -bool LDAPAccessStorage::hasSubscription(AccessEntityType type) const -{ - std::scoped_lock lock(mutex); - return memory_storage.hasSubscription(type); -} - std::optional LDAPAccessStorage::authenticateImpl( const Credentials & credentials, const Poco::Net::IPAddress & address, diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index a86c2fcd35c..df13eff179b 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -32,7 +32,7 @@ class LDAPAccessStorage : public IAccessStorage public: static constexpr char STORAGE_TYPE[] = "ldap"; - explicit LDAPAccessStorage(const String & storage_name_, AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix); + explicit LDAPAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix); virtual ~LDAPAccessStorage() override = default; String getLDAPServerName() const; @@ -42,19 +42,15 @@ public: virtual String getStorageParamsJSON() const override; virtual bool isReadOnly() const override { return true; } virtual bool exists(const UUID & id) const override; - virtual bool hasSubscription(const UUID & id) const override; - virtual bool hasSubscription(AccessEntityType type) const override; private: // IAccessStorage implementations. virtual std::optional findImpl(AccessEntityType type, const String & name) const override; virtual std::vector findAllImpl(AccessEntityType type) const override; virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; virtual std::optional readNameImpl(const UUID & id, bool throw_if_not_exists) const override; - virtual scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; - virtual scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; virtual std::optional authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override; - void setConfiguration(AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix); + void setConfiguration(const Poco::Util::AbstractConfiguration & config, const String & prefix); void processRoleChange(const UUID & id, const AccessEntityPtr & entity); void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); @@ -66,7 +62,7 @@ private: // IAccessStorage implementations. const ExternalAuthenticators & external_authenticators, LDAPClient::SearchResultsList & role_search_results) const; mutable std::recursive_mutex mutex; - AccessControl * access_control = nullptr; + AccessControl & access_control; String ldap_server_name; LDAPClient::RoleSearchParamsList role_search_params; std::set common_role_names; // role name that should be granted to all users at all times diff --git a/src/Access/MemoryAccessStorage.cpp b/src/Access/MemoryAccessStorage.cpp index 6aa0688ee3e..9ed80f4a64d 100644 --- a/src/Access/MemoryAccessStorage.cpp +++ b/src/Access/MemoryAccessStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,8 +8,8 @@ namespace DB { -MemoryAccessStorage::MemoryAccessStorage(const String & storage_name_) - : IAccessStorage(storage_name_) +MemoryAccessStorage::MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_) + : IAccessStorage(storage_name_), changes_notifier(changes_notifier_) { } @@ -63,19 +64,16 @@ AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id, bool throw_if_not std::optional MemoryAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); - UUID id = generateRandomID(); std::lock_guard lock{mutex}; - if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, notifications)) + if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists)) return id; return std::nullopt; } -bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, Notifications & notifications) +bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) { const String & name = new_entity->getName(); AccessEntityType type = new_entity->getType(); @@ -103,7 +101,7 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & if (name_collision && replace_if_exists) { const auto & existing_entry = *(it_by_name->second); - removeNoLock(existing_entry.id, /* throw_if_not_exists = */ false, notifications); + removeNoLock(existing_entry.id, /* throw_if_not_exists = */ false); } /// Do insertion. @@ -111,22 +109,19 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & entry.id = id; entry.entity = new_entity; entries_by_name[name] = &entry; - prepareNotifications(entry, false, notifications); + changes_notifier.onEntityAdded(id, new_entity); return true; } bool MemoryAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists) { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); - std::lock_guard lock{mutex}; - return removeNoLock(id, throw_if_not_exists, notifications); + return removeNoLock(id, throw_if_not_exists); } -bool MemoryAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists, Notifications & notifications) +bool MemoryAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists) { auto it = entries_by_id.find(id); if (it == entries_by_id.end()) @@ -141,27 +136,25 @@ bool MemoryAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists const String & name = entry.entity->getName(); AccessEntityType type = entry.entity->getType(); - prepareNotifications(entry, true, notifications); - /// Do removing. + UUID removed_id = id; auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; entries_by_name.erase(name); entries_by_id.erase(it); + + changes_notifier.onEntityRemoved(removed_id, type); return true; } bool MemoryAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); - std::lock_guard lock{mutex}; - return updateNoLock(id, update_func, throw_if_not_exists, notifications); + return updateNoLock(id, update_func, throw_if_not_exists); } -bool MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, Notifications & notifications) +bool MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { auto it = entries_by_id.find(id); if (it == entries_by_id.end()) @@ -195,7 +188,7 @@ bool MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & updat entries_by_name[new_entity->getName()] = &entry; } - prepareNotifications(entry, false, notifications); + changes_notifier.onEntityUpdated(id, new_entity); return true; } @@ -212,16 +205,8 @@ void MemoryAccessStorage::setAll(const std::vector & all_entiti void MemoryAccessStorage::setAll(const std::vector> & all_entities) { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); - std::lock_guard lock{mutex}; - setAllNoLock(all_entities, notifications); -} - -void MemoryAccessStorage::setAllNoLock(const std::vector> & all_entities, Notifications & notifications) -{ boost::container::flat_set not_used_ids; std::vector conflicting_ids; @@ -256,7 +241,7 @@ void MemoryAccessStorage::setAllNoLock(const std::vector ids_to_remove = std::move(not_used_ids); boost::range::copy(conflicting_ids, std::inserter(ids_to_remove, ids_to_remove.end())); for (const auto & id : ids_to_remove) - removeNoLock(id, /* throw_if_not_exists = */ false, notifications); + removeNoLock(id, /* throw_if_not_exists = */ false); /// Insert or update entities. for (const auto & [id, entity] : all_entities) @@ -269,84 +254,14 @@ void MemoryAccessStorage::setAllNoLock(const std::vector(entry.entity->getType())]) - notifications.push_back({handler, entry.id, entity}); -} - - -scope_guard MemoryAccessStorage::subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const -{ - std::lock_guard lock{mutex}; - auto & handlers = handlers_by_type[static_cast(type)]; - handlers.push_back(handler); - auto handler_it = std::prev(handlers.end()); - - return [this, type, handler_it] - { - std::lock_guard lock2{mutex}; - auto & handlers2 = handlers_by_type[static_cast(type)]; - handlers2.erase(handler_it); - }; -} - - -scope_guard MemoryAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const -{ - std::lock_guard lock{mutex}; - auto it = entries_by_id.find(id); - if (it == entries_by_id.end()) - return {}; - const Entry & entry = it->second; - auto handler_it = entry.handlers_by_id.insert(entry.handlers_by_id.end(), handler); - - return [this, id, handler_it] - { - std::lock_guard lock2{mutex}; - auto it2 = entries_by_id.find(id); - if (it2 != entries_by_id.end()) - { - const Entry & entry2 = it2->second; - entry2.handlers_by_id.erase(handler_it); - } - }; -} - - -bool MemoryAccessStorage::hasSubscription(const UUID & id) const -{ - std::lock_guard lock{mutex}; - auto it = entries_by_id.find(id); - if (it != entries_by_id.end()) - { - const Entry & entry = it->second; - return !entry.handlers_by_id.empty(); - } - return false; -} - - -bool MemoryAccessStorage::hasSubscription(AccessEntityType type) const -{ - std::lock_guard lock{mutex}; - const auto & handlers = handlers_by_type[static_cast(type)]; - return !handlers.empty(); -} } diff --git a/src/Access/MemoryAccessStorage.h b/src/Access/MemoryAccessStorage.h index f497067bd50..690383c6941 100644 --- a/src/Access/MemoryAccessStorage.h +++ b/src/Access/MemoryAccessStorage.h @@ -9,13 +9,15 @@ namespace DB { +class AccessChangesNotifier; + /// Implementation of IAccessStorage which keeps all data in memory. class MemoryAccessStorage : public IAccessStorage { public: static constexpr char STORAGE_TYPE[] = "memory"; - explicit MemoryAccessStorage(const String & storage_name_ = STORAGE_TYPE); + explicit MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_); const char * getStorageType() const override { return STORAGE_TYPE; } @@ -24,8 +26,6 @@ public: void setAll(const std::vector> & all_entities); bool exists(const UUID & id) const override; - bool hasSubscription(const UUID & id) const override; - bool hasSubscription(AccessEntityType type) const override; private: std::optional findImpl(AccessEntityType type, const String & name) const override; @@ -34,25 +34,20 @@ private: std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override; bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; - scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; - scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; + + bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); + bool removeNoLock(const UUID & id, bool throw_if_not_exists); + bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); struct Entry { UUID id; AccessEntityPtr entity; - mutable std::list handlers_by_id; }; - bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, Notifications & notifications); - bool removeNoLock(const UUID & id, bool throw_if_not_exists, Notifications & notifications); - bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, Notifications & notifications); - void setAllNoLock(const std::vector> & all_entities, Notifications & notifications); - void prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const; - - mutable std::recursive_mutex mutex; + mutable std::mutex mutex; std::unordered_map entries_by_id; /// We want to search entries both by ID and by the pair of name and type. std::unordered_map entries_by_name_and_type[static_cast(AccessEntityType::MAX)]; - mutable std::list handlers_by_type[static_cast(AccessEntityType::MAX)]; + AccessChangesNotifier & changes_notifier; }; } diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index d71e46c8523..ce4c9f3fd01 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -45,7 +45,6 @@ void MultipleAccessStorage::setStorages(const std::vector & storages std::unique_lock lock{mutex}; nested_storages = std::make_shared(storages); ids_cache.reset(); - updateSubscriptionsToNestedStorages(lock); } void MultipleAccessStorage::addStorage(const StoragePtr & new_storage) @@ -56,7 +55,6 @@ void MultipleAccessStorage::addStorage(const StoragePtr & new_storage) auto new_storages = std::make_shared(*nested_storages); new_storages->push_back(new_storage); nested_storages = new_storages; - updateSubscriptionsToNestedStorages(lock); } void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove) @@ -70,7 +68,6 @@ void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove) new_storages->erase(new_storages->begin() + index); nested_storages = new_storages; ids_cache.reset(); - updateSubscriptionsToNestedStorages(lock); } std::vector MultipleAccessStorage::getStorages() @@ -225,6 +222,28 @@ bool MultipleAccessStorage::isReadOnly(const UUID & id) const } +void MultipleAccessStorage::reload() +{ + auto storages = getStoragesInternal(); + for (const auto & storage : *storages) + storage->reload(); +} + +void MultipleAccessStorage::startPeriodicReloading() +{ + auto storages = getStoragesInternal(); + for (const auto & storage : *storages) + storage->startPeriodicReloading(); +} + +void MultipleAccessStorage::stopPeriodicReloading() +{ + auto storages = getStoragesInternal(); + for (const auto & storage : *storages) + storage->stopPeriodicReloading(); +} + + std::optional MultipleAccessStorage::insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) { std::shared_ptr storage_for_insertion; @@ -310,145 +329,6 @@ bool MultipleAccessStorage::updateImpl(const UUID & id, const UpdateFunc & updat } -scope_guard MultipleAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const -{ - auto storage = findStorage(id); - if (!storage) - return {}; - return storage->subscribeForChanges(id, handler); -} - - -bool MultipleAccessStorage::hasSubscription(const UUID & id) const -{ - auto storages = getStoragesInternal(); - for (const auto & storage : *storages) - { - if (storage->hasSubscription(id)) - return true; - } - return false; -} - - -scope_guard MultipleAccessStorage::subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const -{ - std::unique_lock lock{mutex}; - auto & handlers = handlers_by_type[static_cast(type)]; - handlers.push_back(handler); - auto handler_it = std::prev(handlers.end()); - if (handlers.size() == 1) - updateSubscriptionsToNestedStorages(lock); - - return [this, type, handler_it] - { - std::unique_lock lock2{mutex}; - auto & handlers2 = handlers_by_type[static_cast(type)]; - handlers2.erase(handler_it); - if (handlers2.empty()) - updateSubscriptionsToNestedStorages(lock2); - }; -} - - -bool MultipleAccessStorage::hasSubscription(AccessEntityType type) const -{ - std::lock_guard lock{mutex}; - const auto & handlers = handlers_by_type[static_cast(type)]; - return !handlers.empty(); -} - - -/// Updates subscriptions to nested storages. -/// We need the subscriptions to the nested storages if someone has subscribed to us. -/// If any of the nested storages is changed we call our subscribers. -void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock & lock) const -{ - /// lock is already locked. - - std::vector> added_subscriptions[static_cast(AccessEntityType::MAX)]; - std::vector removed_subscriptions; - - for (auto type : collections::range(AccessEntityType::MAX)) - { - auto & handlers = handlers_by_type[static_cast(type)]; - auto & subscriptions = subscriptions_to_nested_storages[static_cast(type)]; - if (handlers.empty()) - { - /// None has subscribed to us, we need no subscriptions to the nested storages. - for (auto & subscription : subscriptions | boost::adaptors::map_values) - removed_subscriptions.push_back(std::move(subscription)); - subscriptions.clear(); - } - else - { - /// Someone has subscribed to us, now we need to have a subscription to each nested storage. - for (auto it = subscriptions.begin(); it != subscriptions.end();) - { - const auto & storage = it->first; - auto & subscription = it->second; - if (boost::range::find(*nested_storages, storage) == nested_storages->end()) - { - removed_subscriptions.push_back(std::move(subscription)); - it = subscriptions.erase(it); - } - else - ++it; - } - - for (const auto & storage : *nested_storages) - { - if (!subscriptions.contains(storage)) - added_subscriptions[static_cast(type)].push_back({storage, nullptr}); - } - } - } - - /// Unlock the mutex temporarily because it's much better to subscribe to the nested storages - /// with the mutex unlocked. - lock.unlock(); - removed_subscriptions.clear(); - - for (auto type : collections::range(AccessEntityType::MAX)) - { - if (!added_subscriptions[static_cast(type)].empty()) - { - auto on_changed = [this, type](const UUID & id, const AccessEntityPtr & entity) - { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); - std::lock_guard lock2{mutex}; - for (const auto & handler : handlers_by_type[static_cast(type)]) - notifications.push_back({handler, id, entity}); - }; - for (auto & [storage, subscription] : added_subscriptions[static_cast(type)]) - subscription = storage->subscribeForChanges(type, on_changed); - } - } - - /// Lock the mutex again to store added subscriptions to the nested storages. - lock.lock(); - - for (auto type : collections::range(AccessEntityType::MAX)) - { - if (!added_subscriptions[static_cast(type)].empty()) - { - auto & subscriptions = subscriptions_to_nested_storages[static_cast(type)]; - for (auto & [storage, subscription] : added_subscriptions[static_cast(type)]) - { - if (!subscriptions.contains(storage) && (boost::range::find(*nested_storages, storage) != nested_storages->end()) - && !handlers_by_type[static_cast(type)].empty()) - { - subscriptions.emplace(std::move(storage), std::move(subscription)); - } - } - } - } - - lock.unlock(); -} - - std::optional MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index 3a47163af6f..61a975050b6 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -24,6 +24,10 @@ public: bool isReadOnly() const override; bool isReadOnly(const UUID & id) const override; + void reload() override; + void startPeriodicReloading() override; + void stopPeriodicReloading() override; + void setStorages(const std::vector & storages); void addStorage(const StoragePtr & new_storage); void removeStorage(const StoragePtr & storage_to_remove); @@ -37,8 +41,6 @@ public: StoragePtr getStorage(const UUID & id); bool exists(const UUID & id) const override; - bool hasSubscription(const UUID & id) const override; - bool hasSubscription(AccessEntityType type) const override; protected: std::optional findImpl(AccessEntityType type, const String & name) const override; @@ -48,19 +50,14 @@ protected: std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override; bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; - scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; - scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; std::optional authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override; private: using Storages = std::vector; std::shared_ptr getStoragesInternal() const; - void updateSubscriptionsToNestedStorages(std::unique_lock & lock) const; std::shared_ptr nested_storages; mutable LRUCache ids_cache; - mutable std::list handlers_by_type[static_cast(AccessEntityType::MAX)]; - mutable std::unordered_map subscriptions_to_nested_storages[static_cast(AccessEntityType::MAX)]; mutable std::mutex mutex; }; diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index e56fad720be..d3d1ee3fb6b 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -1,12 +1,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include @@ -30,11 +32,13 @@ static UUID parseUUID(const String & text) ReplicatedAccessStorage::ReplicatedAccessStorage( const String & storage_name_, const String & zookeeper_path_, - zkutil::GetZooKeeper get_zookeeper_) + zkutil::GetZooKeeper get_zookeeper_, + AccessChangesNotifier & changes_notifier_) : IAccessStorage(storage_name_) , zookeeper_path(zookeeper_path_) , get_zookeeper(get_zookeeper_) - , refresh_queue(std::numeric_limits::max()) + , watched_queue(std::make_shared>(std::numeric_limits::max())) + , changes_notifier(changes_notifier_) { if (zookeeper_path.empty()) throw Exception("ZooKeeper path must be non-empty", ErrorCodes::BAD_ARGUMENTS); @@ -45,29 +49,30 @@ ReplicatedAccessStorage::ReplicatedAccessStorage( /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it. if (zookeeper_path.front() != '/') zookeeper_path = "/" + zookeeper_path; + + initializeZookeeper(); } ReplicatedAccessStorage::~ReplicatedAccessStorage() { - ReplicatedAccessStorage::shutdown(); + stopWatchingThread(); } - -void ReplicatedAccessStorage::startup() +void ReplicatedAccessStorage::startWatchingThread() { - initializeZookeeper(); - worker_thread = ThreadFromGlobalPool(&ReplicatedAccessStorage::runWorkerThread, this); + bool prev_watching_flag = watching.exchange(true); + if (!prev_watching_flag) + watching_thread = ThreadFromGlobalPool(&ReplicatedAccessStorage::runWatchingThread, this); } -void ReplicatedAccessStorage::shutdown() +void ReplicatedAccessStorage::stopWatchingThread() { - bool prev_stop_flag = stop_flag.exchange(true); - if (!prev_stop_flag) + bool prev_watching_flag = watching.exchange(false); + if (prev_watching_flag) { - refresh_queue.finish(); - - if (worker_thread.joinable()) - worker_thread.join(); + watched_queue->finish(); + if (watching_thread.joinable()) + watching_thread.join(); } } @@ -105,10 +110,8 @@ std::optional ReplicatedAccessStorage::insertImpl(const AccessEntityPtr & if (!ok) return std::nullopt; - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; - refreshEntityNoLock(zookeeper, id, notifications); + refreshEntityNoLock(zookeeper, id); return id; } @@ -207,10 +210,8 @@ bool ReplicatedAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exis if (!ok) return false; - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; - removeEntityNoLock(id, notifications); + removeEntityNoLock(id); return true; } @@ -261,10 +262,8 @@ bool ReplicatedAccessStorage::updateImpl(const UUID & id, const UpdateFunc & upd if (!ok) return false; - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; - refreshEntityNoLock(zookeeper, id, notifications); + refreshEntityNoLock(zookeeper, id); return true; } @@ -328,16 +327,18 @@ bool ReplicatedAccessStorage::updateZooKeeper(const zkutil::ZooKeeperPtr & zooke } -void ReplicatedAccessStorage::runWorkerThread() +void ReplicatedAccessStorage::runWatchingThread() { - LOG_DEBUG(getLogger(), "Started worker thread"); - while (!stop_flag) + LOG_DEBUG(getLogger(), "Started watching thread"); + setThreadName("ReplACLWatch"); + while (watching) { try { if (!initialized) initializeZookeeper(); - refresh(); + if (refresh()) + changes_notifier.sendNotifications(); } catch (...) { @@ -353,7 +354,7 @@ void ReplicatedAccessStorage::resetAfterError() initialized = false; UUID id; - while (refresh_queue.tryPop(id)) {} + while (watched_queue->tryPop(id)) {} std::lock_guard lock{mutex}; for (const auto type : collections::range(AccessEntityType::MAX)) @@ -389,21 +390,20 @@ void ReplicatedAccessStorage::createRootNodes(const zkutil::ZooKeeperPtr & zooke } } -void ReplicatedAccessStorage::refresh() +bool ReplicatedAccessStorage::refresh() { UUID id; - if (refresh_queue.tryPop(id, /* timeout_ms: */ 10000)) - { - if (stop_flag) - return; + if (!watched_queue->tryPop(id, /* timeout_ms: */ 10000)) + return false; - auto zookeeper = get_zookeeper(); + auto zookeeper = get_zookeeper(); - if (id == UUIDHelpers::Nil) - refreshEntities(zookeeper); - else - refreshEntity(zookeeper, id); - } + if (id == UUIDHelpers::Nil) + refreshEntities(zookeeper); + else + refreshEntity(zookeeper, id); + + return true; } @@ -412,9 +412,9 @@ void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zooke LOG_DEBUG(getLogger(), "Refreshing entities list"); const String zookeeper_uuids_path = zookeeper_path + "/uuid"; - auto watch_entities_list = [this](const Coordination::WatchResponse &) + auto watch_entities_list = [watched_queue = watched_queue](const Coordination::WatchResponse &) { - [[maybe_unused]] bool push_result = refresh_queue.push(UUIDHelpers::Nil); + [[maybe_unused]] bool push_result = watched_queue->push(UUIDHelpers::Nil); }; Coordination::Stat stat; const auto entity_uuid_strs = zookeeper->getChildrenWatch(zookeeper_uuids_path, &stat, watch_entities_list); @@ -424,8 +424,6 @@ void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zooke for (const String & entity_uuid_str : entity_uuid_strs) entity_uuids.insert(parseUUID(entity_uuid_str)); - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; std::vector entities_to_remove; @@ -437,14 +435,14 @@ void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zooke entities_to_remove.push_back(entity_uuid); } for (const auto & entity_uuid : entities_to_remove) - removeEntityNoLock(entity_uuid, notifications); + removeEntityNoLock(entity_uuid); /// Locally add entities that were added to ZooKeeper for (const auto & entity_uuid : entity_uuids) { const auto it = entries_by_id.find(entity_uuid); if (it == entries_by_id.end()) - refreshEntityNoLock(zookeeper, entity_uuid, notifications); + refreshEntityNoLock(zookeeper, entity_uuid); } LOG_DEBUG(getLogger(), "Refreshing entities list finished"); @@ -452,21 +450,18 @@ void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zooke void ReplicatedAccessStorage::refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) { - Notifications notifications; - SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; - - refreshEntityNoLock(zookeeper, id, notifications); + refreshEntityNoLock(zookeeper, id); } -void ReplicatedAccessStorage::refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, Notifications & notifications) +void ReplicatedAccessStorage::refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) { LOG_DEBUG(getLogger(), "Refreshing entity {}", toString(id)); - const auto watch_entity = [this, id](const Coordination::WatchResponse & response) + const auto watch_entity = [watched_queue = watched_queue, id](const Coordination::WatchResponse & response) { if (response.type == Coordination::Event::CHANGED) - [[maybe_unused]] bool push_result = refresh_queue.push(id); + [[maybe_unused]] bool push_result = watched_queue->push(id); }; Coordination::Stat entity_stat; const String entity_path = zookeeper_path + "/uuid/" + toString(id); @@ -475,16 +470,16 @@ void ReplicatedAccessStorage::refreshEntityNoLock(const zkutil::ZooKeeperPtr & z if (exists) { const AccessEntityPtr entity = deserializeAccessEntity(entity_definition, entity_path); - setEntityNoLock(id, entity, notifications); + setEntityNoLock(id, entity); } else { - removeEntityNoLock(id, notifications); + removeEntityNoLock(id); } } -void ReplicatedAccessStorage::setEntityNoLock(const UUID & id, const AccessEntityPtr & entity, Notifications & notifications) +void ReplicatedAccessStorage::setEntityNoLock(const UUID & id, const AccessEntityPtr & entity) { LOG_DEBUG(getLogger(), "Setting id {} to entity named {}", toString(id), entity->getName()); const AccessEntityType type = entity->getType(); @@ -494,12 +489,14 @@ void ReplicatedAccessStorage::setEntityNoLock(const UUID & id, const AccessEntit auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; if (auto it = entries_by_name.find(name); it != entries_by_name.end() && it->second->id != id) { - removeEntityNoLock(it->second->id, notifications); + removeEntityNoLock(it->second->id); } /// If the entity already exists under a different type+name, remove old type+name + bool existed_before = false; if (auto it = entries_by_id.find(id); it != entries_by_id.end()) { + existed_before = true; const AccessEntityPtr & existing_entity = it->second.entity; const AccessEntityType existing_type = existing_entity->getType(); const String & existing_name = existing_entity->getName(); @@ -514,11 +511,18 @@ void ReplicatedAccessStorage::setEntityNoLock(const UUID & id, const AccessEntit entry.id = id; entry.entity = entity; entries_by_name[name] = &entry; - prepareNotifications(entry, false, notifications); + + if (initialized) + { + if (existed_before) + changes_notifier.onEntityUpdated(id, entity); + else + changes_notifier.onEntityAdded(id, entity); + } } -void ReplicatedAccessStorage::removeEntityNoLock(const UUID & id, Notifications & notifications) +void ReplicatedAccessStorage::removeEntityNoLock(const UUID & id) { LOG_DEBUG(getLogger(), "Removing entity with id {}", toString(id)); const auto it = entries_by_id.find(id); @@ -531,7 +535,6 @@ void ReplicatedAccessStorage::removeEntityNoLock(const UUID & id, Notifications const Entry & entry = it->second; const AccessEntityType type = entry.entity->getType(); const String & name = entry.entity->getName(); - prepareNotifications(entry, true, notifications); auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; const auto name_it = entries_by_name.find(name); @@ -542,8 +545,11 @@ void ReplicatedAccessStorage::removeEntityNoLock(const UUID & id, Notifications else entries_by_name.erase(name); + UUID removed_id = id; entries_by_id.erase(id); LOG_DEBUG(getLogger(), "Removed entity with id {}", toString(id)); + + changes_notifier.onEntityRemoved(removed_id, type); } @@ -594,73 +600,4 @@ AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id, bool throw_if return entry.entity; } - -void ReplicatedAccessStorage::prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const -{ - const AccessEntityPtr entity = remove ? nullptr : entry.entity; - for (const auto & handler : entry.handlers_by_id) - notifications.push_back({handler, entry.id, entity}); - - for (const auto & handler : handlers_by_type[static_cast(entry.entity->getType())]) - notifications.push_back({handler, entry.id, entity}); -} - - -scope_guard ReplicatedAccessStorage::subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const -{ - std::lock_guard lock{mutex}; - auto & handlers = handlers_by_type[static_cast(type)]; - handlers.push_back(handler); - auto handler_it = std::prev(handlers.end()); - - return [this, type, handler_it] - { - std::lock_guard lock2{mutex}; - auto & handlers2 = handlers_by_type[static_cast(type)]; - handlers2.erase(handler_it); - }; -} - - -scope_guard ReplicatedAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const -{ - std::lock_guard lock{mutex}; - const auto it = entries_by_id.find(id); - if (it == entries_by_id.end()) - return {}; - const Entry & entry = it->second; - auto handler_it = entry.handlers_by_id.insert(entry.handlers_by_id.end(), handler); - - return [this, id, handler_it] - { - std::lock_guard lock2{mutex}; - auto it2 = entries_by_id.find(id); - if (it2 != entries_by_id.end()) - { - const Entry & entry2 = it2->second; - entry2.handlers_by_id.erase(handler_it); - } - }; -} - - -bool ReplicatedAccessStorage::hasSubscription(const UUID & id) const -{ - std::lock_guard lock{mutex}; - const auto & it = entries_by_id.find(id); - if (it != entries_by_id.end()) - { - const Entry & entry = it->second; - return !entry.handlers_by_id.empty(); - } - return false; -} - - -bool ReplicatedAccessStorage::hasSubscription(AccessEntityType type) const -{ - std::lock_guard lock{mutex}; - const auto & handlers = handlers_by_type[static_cast(type)]; - return !handlers.empty(); -} } diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h index 8fdd24b6d54..f9f579e2ba7 100644 --- a/src/Access/ReplicatedAccessStorage.h +++ b/src/Access/ReplicatedAccessStorage.h @@ -18,32 +18,33 @@ namespace DB { +class AccessChangesNotifier; + /// Implementation of IAccessStorage which keeps all data in zookeeper. class ReplicatedAccessStorage : public IAccessStorage { public: static constexpr char STORAGE_TYPE[] = "replicated"; - ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper); + ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_); virtual ~ReplicatedAccessStorage() override; const char * getStorageType() const override { return STORAGE_TYPE; } - virtual void startup(); - virtual void shutdown(); + void startPeriodicReloading() override { startWatchingThread(); } + void stopPeriodicReloading() override { stopWatchingThread(); } bool exists(const UUID & id) const override; - bool hasSubscription(const UUID & id) const override; - bool hasSubscription(AccessEntityType type) const override; private: String zookeeper_path; zkutil::GetZooKeeper get_zookeeper; std::atomic initialized = false; - std::atomic stop_flag = false; - ThreadFromGlobalPool worker_thread; - ConcurrentBoundedQueue refresh_queue; + + std::atomic watching = false; + ThreadFromGlobalPool watching_thread; + std::shared_ptr> watched_queue; std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override; @@ -53,37 +54,36 @@ private: bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists); bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); - void runWorkerThread(); - void resetAfterError(); void initializeZookeeper(); void createRootNodes(const zkutil::ZooKeeperPtr & zookeeper); - void refresh(); + void startWatchingThread(); + void stopWatchingThread(); + + void runWatchingThread(); + void resetAfterError(); + + bool refresh(); void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper); void refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id); - void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, Notifications & notifications); + void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id); - void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity, Notifications & notifications); - void removeEntityNoLock(const UUID & id, Notifications & notifications); + void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity); + void removeEntityNoLock(const UUID & id); struct Entry { UUID id; AccessEntityPtr entity; - mutable std::list handlers_by_id; }; std::optional findImpl(AccessEntityType type, const String & name) const override; std::vector findAllImpl(AccessEntityType type) const override; AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; - void prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const; - scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; - scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; - mutable std::mutex mutex; std::unordered_map entries_by_id; std::unordered_map entries_by_name_and_type[static_cast(AccessEntityType::MAX)]; - mutable std::list handlers_by_type[static_cast(AccessEntityType::MAX)]; + AccessChangesNotifier & changes_notifier; }; } diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index f0e1435e299..308b771243e 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -66,9 +66,6 @@ RoleCache::~RoleCache() = default; std::shared_ptr RoleCache::getEnabledRoles(const std::vector & roles, const std::vector & roles_with_admin_option) { - /// Declared before `lock` to send notifications after the mutex will be unlocked. - scope_guard notifications; - std::lock_guard lock{mutex}; EnabledRoles::Params params; params.current_roles.insert(roles.begin(), roles.end()); @@ -83,13 +80,13 @@ RoleCache::getEnabledRoles(const std::vector & roles, const std::vector(new EnabledRoles(params)); - collectEnabledRoles(*res, notifications); + collectEnabledRoles(*res, nullptr); enabled_roles.emplace(std::move(params), res); return res; } -void RoleCache::collectEnabledRoles(scope_guard & notifications) +void RoleCache::collectEnabledRoles(scope_guard * notifications) { /// `mutex` is already locked. @@ -107,7 +104,7 @@ void RoleCache::collectEnabledRoles(scope_guard & notifications) } -void RoleCache::collectEnabledRoles(EnabledRoles & enabled, scope_guard & notifications) +void RoleCache::collectEnabledRoles(EnabledRoles & enabled, scope_guard * notifications) { /// `mutex` is already locked. @@ -170,7 +167,7 @@ void RoleCache::roleChanged(const UUID & role_id, const RolePtr & changed_role) return; role_from_cache->first = changed_role; cache.update(role_id, role_from_cache); - collectEnabledRoles(notifications); + collectEnabledRoles(¬ifications); } @@ -181,7 +178,7 @@ void RoleCache::roleRemoved(const UUID & role_id) std::lock_guard lock{mutex}; cache.remove(role_id); - collectEnabledRoles(notifications); + collectEnabledRoles(¬ifications); } } diff --git a/src/Access/RoleCache.h b/src/Access/RoleCache.h index e9c731f1342..45746f2e3c3 100644 --- a/src/Access/RoleCache.h +++ b/src/Access/RoleCache.h @@ -24,8 +24,8 @@ public: const std::vector & current_roles_with_admin_option); private: - void collectEnabledRoles(scope_guard & notifications); - void collectEnabledRoles(EnabledRoles & enabled, scope_guard & notifications); + void collectEnabledRoles(scope_guard * notifications); + void collectEnabledRoles(EnabledRoles & enabled, scope_guard * notifications); RolePtr getRole(const UUID & role_id); void roleChanged(const UUID & role_id, const RolePtr & changed_role); void roleRemoved(const UUID & role_id); diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 712e5393ce7..a6c4388fef8 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -14,9 +15,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -525,8 +523,8 @@ namespace } } -UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_) - : IAccessStorage(storage_name_), access_control(access_control_) +UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_) + : IAccessStorage(storage_name_), access_control(access_control_), memory_storage(storage_name_, access_control.getChangesNotifier()) { } @@ -605,9 +603,9 @@ void UsersConfigAccessStorage::load( std::make_shared(), [&](Poco::AutoPtr new_config, bool /*initial_loading*/) { - parseFromConfig(*new_config); - Settings::checkNoSettingNamesAtTopLevel(*new_config, users_config_path); + parseFromConfig(*new_config); + access_control.getChangesNotifier().sendNotifications(); }, /* already_loaded = */ false); } @@ -662,27 +660,4 @@ std::optional UsersConfigAccessStorage::readNameImpl(const UUID & id, bo return memory_storage.readName(id, throw_if_not_exists); } - -scope_guard UsersConfigAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const -{ - return memory_storage.subscribeForChanges(id, handler); -} - - -scope_guard UsersConfigAccessStorage::subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const -{ - return memory_storage.subscribeForChanges(type, handler); -} - - -bool UsersConfigAccessStorage::hasSubscription(const UUID & id) const -{ - return memory_storage.hasSubscription(id); -} - - -bool UsersConfigAccessStorage::hasSubscription(AccessEntityType type) const -{ - return memory_storage.hasSubscription(type); -} } diff --git a/src/Access/UsersConfigAccessStorage.h b/src/Access/UsersConfigAccessStorage.h index e21eb17f462..5c99bf30160 100644 --- a/src/Access/UsersConfigAccessStorage.h +++ b/src/Access/UsersConfigAccessStorage.h @@ -22,7 +22,7 @@ public: static constexpr char STORAGE_TYPE[] = "users.xml"; - UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_); + UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_); ~UsersConfigAccessStorage() override; const char * getStorageType() const override { return STORAGE_TYPE; } @@ -37,13 +37,12 @@ public: const String & include_from_path = {}, const String & preprocessed_dir = {}, const zkutil::GetZooKeeper & get_zookeeper_function = {}); - void reload(); - void startPeriodicReloading(); - void stopPeriodicReloading(); + + void reload() override; + void startPeriodicReloading() override; + void stopPeriodicReloading() override; bool exists(const UUID & id) const override; - bool hasSubscription(const UUID & id) const override; - bool hasSubscription(AccessEntityType type) const override; private: void parseFromConfig(const Poco::Util::AbstractConfiguration & config); @@ -51,10 +50,8 @@ private: std::vector findAllImpl(AccessEntityType type) const override; AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; std::optional readNameImpl(const UUID & id, bool throw_if_not_exists) const override; - scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; - scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; - const AccessControl & access_control; + AccessControl & access_control; MemoryAccessStorage memory_storage; String path; std::unique_ptr config_reloader; diff --git a/src/Access/tests/gtest_replicated_access_storage.cpp b/src/Access/tests/gtest_replicated_access_storage.cpp index f2052e91749..c780e598b64 100644 --- a/src/Access/tests/gtest_replicated_access_storage.cpp +++ b/src/Access/tests/gtest_replicated_access_storage.cpp @@ -1,5 +1,6 @@ #include #include +#include using namespace DB; @@ -12,18 +13,6 @@ namespace ErrorCodes } -TEST(ReplicatedAccessStorage, ShutdownWithoutStartup) -{ - auto get_zk = []() - { - return std::shared_ptr(); - }; - - auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk); - storage.shutdown(); -} - - TEST(ReplicatedAccessStorage, ShutdownWithFailedStartup) { auto get_zk = []() @@ -31,16 +20,16 @@ TEST(ReplicatedAccessStorage, ShutdownWithFailedStartup) return std::shared_ptr(); }; - auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk); + AccessChangesNotifier changes_notifier; + try { - storage.startup(); + auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk, changes_notifier); } catch (Exception & e) { if (e.code() != ErrorCodes::NO_ZOOKEEPER) throw; } - storage.shutdown(); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 34f396b978c..29491dfa5ea 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -342,8 +342,6 @@ struct ContextSharedPart /// Stop periodic reloading of the configuration files. /// This must be done first because otherwise the reloading may pass a changed config /// to some destroyed parts of ContextSharedPart. - if (access_control) - access_control->stopPeriodicReloadingUsersConfigs(); if (external_dictionaries_loader) external_dictionaries_loader->enablePeriodicUpdates(false); if (external_user_defined_executable_functions_loader) From 9ec3b35cf240af9a5b55f2d31cdfdd9e24e292ca Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 19 May 2022 09:24:28 +0200 Subject: [PATCH 356/615] Use AccessExpireCache instead of ExpireCache. --- src/Access/AccessControl.cpp | 4 ++-- src/Access/RoleCache.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index c602e01623c..5cf283ba803 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -83,7 +83,7 @@ public: private: const AccessControl & access_control; - Poco::ExpireCache> cache; + Poco::AccessExpireCache> cache; std::mutex mutex; }; diff --git a/src/Access/RoleCache.h b/src/Access/RoleCache.h index 45746f2e3c3..51c415d4d1d 100644 --- a/src/Access/RoleCache.h +++ b/src/Access/RoleCache.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -31,7 +31,7 @@ private: void roleRemoved(const UUID & role_id); const AccessControl & access_control; - Poco::ExpireCache> cache; + Poco::AccessExpireCache> cache; std::map> enabled_roles; mutable std::mutex mutex; }; From 0d69f35b6a19637264e2b11109d62643fddc8b3d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 21 May 2022 14:54:45 +0200 Subject: [PATCH 357/615] Fixed style check --- src/Functions/FunctionsComparison.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 2911fb5d004..16575e551a7 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -143,7 +143,8 @@ struct NumComparisonImpl } })) - static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray & c) { + static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray & c) + { #if USE_MULTITARGET_CODE if (isArchSupported(TargetArch::AVX2)) { From 3ff82569d0a21d06daf2b701ad1bcddfc098bf12 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 21 May 2022 12:30:57 -0300 Subject: [PATCH 358/615] Update tables.md --- docs/en/operations/system-tables/tables.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 8286d51aed6..13c2616ff54 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -12,11 +12,13 @@ Columns: - `name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). + - `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). - `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. -- `data_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table data in the file system. +- `data_paths` (Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems. - `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. @@ -60,6 +62,14 @@ Columns: - `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source. +- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database loading dependencies (list of objects which should be loaded before the current object). + +- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table loading dependencies (list of objects which should be loaded before the current object). + +- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading database. + +- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading table. + The `system.tables` table is used in `SHOW TABLES` query implementation. **Example** @@ -95,6 +105,10 @@ lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: has_own_data: 0 +loading_dependencies_database: [] +loading_dependencies_table: [] +loading_dependent_database: [] +loading_dependent_table: [] Row 2: ────── @@ -122,4 +136,8 @@ lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: has_own_data: 0 +loading_dependencies_database: [] +loading_dependencies_table: [] +loading_dependent_database: [] +loading_dependent_table: [] ``` From cb68b20c8a71452de0f731dbeb0b7d9f62ae2aec Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 21 May 2022 12:41:50 -0300 Subject: [PATCH 359/615] Update tables.md --- docs/ru/operations/system-tables/tables.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/ru/operations/system-tables/tables.md b/docs/ru/operations/system-tables/tables.md index bf47051442e..ceb9ebc79bf 100644 --- a/docs/ru/operations/system-tables/tables.md +++ b/docs/ru/operations/system-tables/tables.md @@ -12,11 +12,13 @@ - `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Uuid таблицы (Atomic database). + - `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров). - `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет. -- `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе. +- `data_paths` (Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — пути к данным таблицы в файловых системах. - `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе. @@ -60,6 +62,14 @@ - `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий хранит ли таблица сама какие-то данные на диске или только обращается к какому-то другому источнику. +- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - базы данных необходимые для загрузки объекта. + +- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - таблицы необходимые для загрузки объекта. + +- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - базы данных, которым объект необходим для загрузки. + +- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - таблицы, которым объект необходим для загрузки. + Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`. **Пример** @@ -95,6 +105,10 @@ lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: has_own_data: 0 +loading_dependencies_database: [] +loading_dependencies_table: [] +loading_dependent_database: [] +loading_dependent_table: [] Row 2: ────── @@ -122,4 +136,8 @@ lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: has_own_data: 0 +loading_dependencies_database: [] +loading_dependencies_table: [] +loading_dependent_database: [] +loading_dependent_table: [] ``` From 2f8edc8e7ff00e25396b1e591d75973c10d16cad Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 21 May 2022 12:44:19 -0300 Subject: [PATCH 360/615] Update tables.md --- docs/ru/operations/system-tables/tables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/system-tables/tables.md b/docs/ru/operations/system-tables/tables.md index ceb9ebc79bf..ae5ca586a88 100644 --- a/docs/ru/operations/system-tables/tables.md +++ b/docs/ru/operations/system-tables/tables.md @@ -18,7 +18,7 @@ - `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет. -- `data_paths` (Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — пути к данным таблицы в файловых системах. +- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — пути к данным таблицы в файловых системах. - `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе. From 4e327564f5a13e1d6f6562e178e69a8ca0a31ca0 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 21 May 2022 12:45:09 -0300 Subject: [PATCH 361/615] Update tables.md --- docs/en/operations/system-tables/tables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 13c2616ff54..6cf1490f14e 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -18,7 +18,7 @@ Columns: - `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. -- `data_paths` (Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems. +- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems. - `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. From 942af133e5a682af3249b501c142b4b38bfe92cb Mon Sep 17 00:00:00 2001 From: Memo Date: Sat, 21 May 2022 23:54:12 +0800 Subject: [PATCH 362/615] init --- src/Functions/generateUUIDv4.cpp | 10 +- ...generate_multi_columns_with_uuid.reference | 100 ++++++++++++++++++ ...02310_generate_multi_columns_with_uuid.sql | 19 ++++ 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference create mode 100644 tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index 659c4c2c7c6..e8309ea7234 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -23,10 +23,18 @@ public: size_t getNumberOfArguments() const override { return 0; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes &) const override + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { + if (arguments.size() > 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 0 or 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + return std::make_shared(); } diff --git a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference new file mode 100644 index 00000000000..c6f93358b29 --- /dev/null +++ b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference @@ -0,0 +1,100 @@ +e2bb163f-34e6-4e9d-b034-ce007be68373 +8d29bbbd-1e08-4680-b5b0-cd246a93f609 +59daf935-7369-4e9c-8356-581b7f6fb533 +70ba5575-349c-4554-9db1-d8754c82d132 +b74ea91c-e15a-43d7-b459-f7dc5a981f7d +67f79647-9111-43c3-813c-60a040244e7f +d39ceea1-293c-4bf8-9efb-10ddef918ee4 +29b285bd-5a1c-49d7-b21a-ff3ec72b96fe +f90059a1-33fc-4b5b-95ee-68ed44875c2a +394164c6-7a5d-4c2f-be50-e34a8cd9e619 +b76f8ccb-9ec1-49eb-912f-c432cb4e0433 +fd211b9a-6aaa-4ad1-b720-9f91db6c89e7 +264ad5d0-5bd5-456d-a3d5-8ae9bccf532d +29063f15-c68e-4c34-8206-613a64944d62 +21557fee-5cc9-4b69-9d87-c66b93e78ba9 +5ae7c2d4-191d-41fc-8b9a-3c7365385536 +62829100-2e50-43b6-af28-e4b69a9c7046 +620eaf2a-22ba-4f63-8cea-a2abcf9d72ce +41663a3d-8006-4aea-9000-e8d56b4352fb +5e3322e8-02ee-4742-b344-83cc6d661525 +4ab46734-b75e-4cd6-b65c-f22e2cd91b48 +9099183c-d557-43d7-8ed6-657baee076cc +9c140710-1b03-4485-a746-b66003fb0842 +6aaf2122-4bdb-4179-852e-bc256235ca45 +9354a012-2b3c-486e-910f-0ec7f3415030 +87700127-b7db-4d95-bfd8-ce6f5938a2b7 +45b4f61d-f24d-43ab-822a-6b1926d6bbfc +cd3db3d5-fb0d-4de1-9cc3-d5a910c5f1d0 +96af3bc4-74f5-428d-8b1f-dca55b0c87cf +6ba5e92c-f48d-4bb5-a0c7-e14bcf679d78 +7580efeb-1717-4c0c-af1f-d11eb9f92d92 7580efeb-1717-4c0c-af1f-d11eb9f92d92 +2ec4be1a-bf75-4bf3-bc5f-56a55e90e569 2ec4be1a-bf75-4bf3-bc5f-56a55e90e569 +430c2fe5-0a31-4710-b39f-b6c221503bd1 430c2fe5-0a31-4710-b39f-b6c221503bd1 +94cc855c-54d7-4e03-bc57-fa90c1f23277 94cc855c-54d7-4e03-bc57-fa90c1f23277 +fe5c31f5-d249-4d3e-8b88-d4aa61fc3e21 fe5c31f5-d249-4d3e-8b88-d4aa61fc3e21 +7ddcc84d-3cfd-4793-8afc-b07f6ff06291 7ddcc84d-3cfd-4793-8afc-b07f6ff06291 +1d9fa2d5-4b6c-4219-8300-768e19978ca3 1d9fa2d5-4b6c-4219-8300-768e19978ca3 +213ec099-6a61-4e46-a90b-b31417393a6a 213ec099-6a61-4e46-a90b-b31417393a6a +d01dda38-ba5e-4b89-ad5f-96cd67040530 d01dda38-ba5e-4b89-ad5f-96cd67040530 +3697424a-a4e2-465b-8c0b-39255d0b0e73 3697424a-a4e2-465b-8c0b-39255d0b0e73 +ab301df3-1365-4b00-b4f6-24e6bb7026d6 ab301df3-1365-4b00-b4f6-24e6bb7026d6 +d841d809-ac28-41ac-8333-a7cb94d3baf0 d841d809-ac28-41ac-8333-a7cb94d3baf0 +32d02bfb-de35-4d87-8b5d-f39c1958a62a 32d02bfb-de35-4d87-8b5d-f39c1958a62a +6b8a8bf1-1a74-494b-842c-2398763620d3 6b8a8bf1-1a74-494b-842c-2398763620d3 +aecd5210-a235-4804-995a-1a1d30e574fe aecd5210-a235-4804-995a-1a1d30e574fe +cfb2e0d0-0c2f-4211-9c4c-704a59f3655b cfb2e0d0-0c2f-4211-9c4c-704a59f3655b +e124f4ec-58df-4bdd-b5d0-67a36f72f018 e124f4ec-58df-4bdd-b5d0-67a36f72f018 +defda77b-dcf1-4e6f-b3a9-53668562c852 defda77b-dcf1-4e6f-b3a9-53668562c852 +0fe4d450-7abb-4972-b66d-064822cfdfc0 0fe4d450-7abb-4972-b66d-064822cfdfc0 +7d9f4ec9-bcb6-4f5a-b602-3de1dd2d8f66 7d9f4ec9-bcb6-4f5a-b602-3de1dd2d8f66 +084451c1-8e55-49db-b6ef-caa8b4a9cb4a 13118c86-100a-4b5d-afd2-75b0f8fc4c99 +a53c9325-d821-44e3-9dee-2c551a79cedc 44ccc4a5-6873-4f07-9ce6-be86e33543c3 +de4ce7bb-753a-4a84-92b3-2e65a84deefd e6fe9df5-b4b4-40a4-bf83-9b745a28d6d0 +5c493fda-5aff-4d49-bccf-93c9b90b17de c6872ee1-9131-43bb-8d51-a29cf8a8cb6f +e98fd48e-36f5-4fe6-b20a-012cfdf8ec8d 61c4f2b3-7d7e-427d-8dac-1f488916bc9a +121f5fb5-afc4-4c25-8f13-b3c5d6ef3ea8 b2e12d1a-6872-45a0-9582-9a2761d937c6 +7cebfef1-aa83-4ff5-aa11-3f9521a72899 f7e6db87-28bc-4d76-a6ea-7981fbd7aebd +6637d6b5-ac02-4ace-8e0e-0ed378722926 28891a8d-c9da-4030-ade2-5a9973739bb0 +1b87e644-cbcd-449b-9fbe-5808cde60310 4d74060c-2535-4901-ac15-8b91c17ac609 +b9f8fa5e-a8bb-4b88-91d5-c3e5f45b8b88 db61ac3c-c259-40d2-aa02-477e12b8f2d2 +506a9df5-deed-4c29-8458-ace61436593d 72d234c4-765d-4742-bc22-edca3045d30f +0b5e27dc-4cb7-411f-823c-9ffa49b811f7 f874258c-4864-4db8-8c05-ce1f34c2b398 +ce1ad114-8e62-46e1-a3e3-0497db659211 1b274e59-12eb-456c-bdfa-cb8794bee7f6 +c4b2111b-63e6-4cdf-8b46-7962665e2221 5f027848-b3bb-438d-8e04-e167d5fb6e1d +73e0b973-b912-4641-ba24-c43cc0d1c123 a48badd5-4289-4de0-8a20-eb6b8b027787 +00f13861-4e0e-4745-8adf-2eda884af332 1d50b1ee-89fe-4988-9554-060017b11d88 +0158f347-bccd-4957-b998-daacf59af1cf 6b3b627a-9c70-42e4-bdc2-0a34516f249b +cb3423be-6bcd-4a78-9345-66ca2c0f5885 f653d21a-35ad-46b0-be38-906d86566947 +9f3c2570-6930-443d-a686-573244e4fcea 82640b98-82b7-46bd-8da2-a396be0d9e48 +77502331-f470-4a6b-a824-52fd6dc793bb a4f9c43e-6b47-4413-8189-56c5718af9fa +27b0d3e8-67dc-4480-a1e8-717a7ee0f5b4 e18165f7-f535-432e-90ef-4e1df31f7dd6 +fd1756ee-f6fb-46d1-b8dc-89a4f23ee5f6 7f1bfba8-1f44-4e1b-a8ad-57f055e1dce2 +2c6131ab-e39e-4de6-aafe-bf303c65568d 3c412a22-84d3-4996-997a-176d51d9018b +a4f8167c-30b6-4d93-b407-81a0a46e0d55 6d41b851-a04c-4f75-84fd-9042f2b36f08 +90f9b25b-5576-47f2-ae4a-2ba92e586dd6 de7dbbbe-b0dd-426a-b66d-b88f5d85642a +6570262b-f9ad-4e6e-a5d9-29beae87a756 ec8ca05a-d43f-4af7-8b73-0756d4231201 +b73ca0e5-57c3-4d89-847d-b8c69c65fcce 6508b391-ed65-4a68-872c-95317e23088b +b449e15b-1116-404a-ae92-4b0ceed0003c 51649521-d7e6-4dfe-8438-7178caf3e035 +0cbe3104-0582-44ad-b181-685ba0f9e771 d88f69da-8b2c-4076-a575-7e5e3e27987b +1a08f5f0-8223-43f1-8c24-9ea554e26ebf 8db1d4e1-c999-4950-a924-3a0bd3b8ca62 +28526658-b269-4cd8-ac2d-b1876cb4d2eb 28526658-b269-4cd8-ac2d-b1876cb4d2eb 28526658-b269-4cd8-ac2d-b1876cb4d2eb +970ef9d3-6152-4c09-9d15-41e9466eb5f8 970ef9d3-6152-4c09-9d15-41e9466eb5f8 970ef9d3-6152-4c09-9d15-41e9466eb5f8 +c7866f83-b108-4bd6-842c-9c0373bb78eb c7866f83-b108-4bd6-842c-9c0373bb78eb c7866f83-b108-4bd6-842c-9c0373bb78eb +f1fa3307-2892-493b-a512-ad96ac1e7af0 f1fa3307-2892-493b-a512-ad96ac1e7af0 f1fa3307-2892-493b-a512-ad96ac1e7af0 +26d0b9b7-e285-4698-ac3d-9022a43ec327 26d0b9b7-e285-4698-ac3d-9022a43ec327 26d0b9b7-e285-4698-ac3d-9022a43ec327 +540e0837-cd08-454d-9750-c37333789ce7 540e0837-cd08-454d-9750-c37333789ce7 540e0837-cd08-454d-9750-c37333789ce7 +e7c41258-128a-4a3b-a83a-361962997650 e7c41258-128a-4a3b-a83a-361962997650 e7c41258-128a-4a3b-a83a-361962997650 +c73addc8-b81a-4102-afe4-19debaef5f13 c73addc8-b81a-4102-afe4-19debaef5f13 c73addc8-b81a-4102-afe4-19debaef5f13 +c6b7be30-df08-48fb-9181-7cd3446316d1 c6b7be30-df08-48fb-9181-7cd3446316d1 c6b7be30-df08-48fb-9181-7cd3446316d1 +1ab92b02-929d-4c33-8a32-4d9725af967d 1ab92b02-929d-4c33-8a32-4d9725af967d 1ab92b02-929d-4c33-8a32-4d9725af967d +07cf59d3-19fa-4844-b13f-5bb0579aa997 d4b545cb-628f-4274-8e33-da8efdafad5e b27a199a-8ad7-468a-82d8-5a9515bfc72b +34e555c7-8e27-4920-a4ec-33a8e82ac6d9 57e9eb17-edc4-4a66-9eac-640905d49fbd 55975cdc-ded4-4c29-8d7d-5cd2e40c26cc +700001d8-6fb5-4ce6-9f05-c2e11c95c65a 6101d5c3-3c8d-4d5e-8fb3-f03ff3d655d4 d8ba87c4-0c8c-435b-ab65-21bf5f1d8795 +d4ae32bf-6953-4dac-937c-e831e7b7ae44 ce7fe0e9-1a10-47f2-8f4b-0b81a714d565 efae9196-e000-4097-9770-6a17895b3cf6 +746d3b36-2b6f-49e3-9c13-9c8460ce3baf 32421cd4-aa84-4efd-9595-0fe16b1e37b5 71881fc4-bc88-4cf6-9355-b4d91eab0822 +715b41c7-e111-44f4-9122-f339886569e4 9c5def57-faf7-4f37-96aa-0a4a420fee39 51eb2c96-cfa5-4a9d-9f62-c305c0e331f8 +8acab48b-ffe1-4065-bb59-72ae019b29b7 2f2cdd40-bd22-484e-be1e-e084871f5cfe 1538129a-ff7f-490a-94e8-66e7a04dc57d +458c0e36-009b-4db7-b00b-55f58e5b8072 c2cffb5b-dc7d-47c2-b53c-6cad1e00f803 950194b8-c048-442c-a73f-f36a48665e83 +31f3f0f5-33fe-456b-bce5-ad58d94e00de 4c0f14f9-9976-4a66-8480-bfde5b9c392f f1806bfb-fce0-4b10-80d6-f0aaa0198c41 +962b1fe7-331d-4398-9cbd-bda028709e27 7413b5d2-95fc-4d66-a642-8009f4543c78 3dccbb14-e511-4113-aff9-0a12789c3800 diff --git a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql new file mode 100644 index 00000000000..99f6b1dda86 --- /dev/null +++ b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql @@ -0,0 +1,19 @@ +select generateUUIDv4() from numbers(10); + +select generateUUIDv4(1) from numbers(10); + +select generateUUIDv4(2) from numbers(10); + +select generateUUIDv4(1), generateUUIDv4(1) from numbers(10); + +select generateUUIDv4(), generateUUIDv4() from numbers(10); + +select generateUUIDv4(1), generateUUIDv4() from numbers(10); + +select generateUUIDv4(), generateUUIDv4(1) from numbers(10); + +select generateUUIDv4(1), generateUUIDv4(2) from numbers(10); + +select generateUUIDv4(1), generateUUIDv4(1), generateUUIDv4(1) from numbers(10); + +select generateUUIDv4(1), generateUUIDv4(2), generateUUIDv4(3) from numbers(10); From c12f826d225f0b3a66287817c9d2a481f860fc79 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 21 May 2022 15:59:14 +0300 Subject: [PATCH 363/615] Implemented changing comment to a ReplicatedMergeTree table --- .../ReplicatedMergeTreeTableMetadata.cpp | 24 +++++++++++++++++-- .../ReplicatedMergeTreeTableMetadata.h | 6 ++++- src/Storages/StorageReplicatedMergeTree.cpp | 8 +++++++ ...2302_ReplicatedMergeTree_comment.reference | 3 +++ .../02302_ReplicatedMergeTree_comment.sql | 23 ++++++++++++++++++ 5 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.reference create mode 100644 tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.sql diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 7dee7b8d0f8..393c2eb0dd1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -73,6 +73,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr index_granularity_bytes = 0; constraints = metadata_snapshot->getConstraints().toString(); + comment = metadata_snapshot->comment; } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const @@ -108,6 +109,9 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!constraints.empty()) out << "constraints: " << constraints << "\n"; + + if (!comment.empty()) + out << "comment: " << quote << comment << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -155,8 +159,18 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) else index_granularity_bytes = 0; - if (checkString("constraints: ", in)) - in >> constraints >> "\n"; + String verb; + readStringUntilWhitespace(verb, in); + + if (verb == "constraints:") + { + in >> " " >> constraints >> "\n"; + + readStringUntilWhitespace(verb, in); + } + + if (verb == "comment:") + in >> " " >> quote >> comment >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -350,6 +364,12 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl diff.new_constraints = from_zk.constraints; } + if (comment != from_zk.comment) + { + diff.comment_changed = true; + diff.comment = from_zk.comment; + } + return diff; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 6d510d20304..246cf863d13 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -30,6 +30,7 @@ struct ReplicatedMergeTreeTableMetadata String projections; String constraints; String ttl_table; + String comment; UInt64 index_granularity_bytes; ReplicatedMergeTreeTableMetadata() = default; @@ -61,10 +62,13 @@ struct ReplicatedMergeTreeTableMetadata bool ttl_table_changed = false; String new_ttl_table; + bool comment_changed = false; + String comment; + bool empty() const { return !sorting_key_changed && !sampling_expression_changed && !skip_indices_changed && !projections_changed - && !ttl_table_changed && !constraints_changed; + && !ttl_table_changed && !constraints_changed && !comment_changed; } }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 36080485aca..a72866d1dde 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1151,6 +1151,9 @@ void StorageReplicatedMergeTree::setTableStructure( new_metadata.table_ttl = TTLTableDescription{}; } } + + if (metadata_diff.comment_changed) + new_metadata.comment = metadata_diff.comment; } /// Changes in columns may affect following metadata fields @@ -4776,6 +4779,11 @@ void StorageReplicatedMergeTree::alter( future_metadata_in_zk.ttl_table = ""; } + if (future_metadata.comment != current_metadata->comment) + { + future_metadata_in_zk.comment = future_metadata.comment; + } + String new_indices_str = future_metadata.secondary_indices.toString(); if (new_indices_str != current_metadata->secondary_indices.toString()) future_metadata_in_zk.skip_indices = new_indices_str; diff --git a/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.reference b/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.reference new file mode 100644 index 00000000000..ea14c4d69b4 --- /dev/null +++ b/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.reference @@ -0,0 +1,3 @@ +Comment text for test table +Some new more detailed text of comment + diff --git a/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.sql b/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.sql new file mode 100644 index 00000000000..282c90d24bf --- /dev/null +++ b/tests/queries/0_stateless/02302_ReplicatedMergeTree_comment.sql @@ -0,0 +1,23 @@ +-- Validate that setting/removing and getting comments on ReplicatedMergeTree works +-- https://github.com/ClickHouse/ClickHouse/issues/36377 + + +CREATE TABLE 02302_ReplicatedMergeTree_comment +( + key UInt64 COMMENT 'The PK' +) +ENGINE = ReplicatedMergeTree('/test/02302_ReplicatedMergeTree_comment/{database}/source', '1') +PARTITION BY key +ORDER BY tuple() +COMMENT 'Comment text for test table'; + +# Check that comment is present +SELECT comment FROM system.tables WHERE database = currentDatabase() AND name == '02302_ReplicatedMergeTree_comment'; + +# Change to a different value and check if it was changed +ALTER TABLE 02302_ReplicatedMergeTree_comment MODIFY COMMENT 'Some new more detailed text of comment'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND name == '02302_ReplicatedMergeTree_comment'; + +# Remove the comment and check if it is empty now +ALTER TABLE 02302_ReplicatedMergeTree_comment MODIFY COMMENT ''; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND name == '02302_ReplicatedMergeTree_comment'; From d1678c96626b0bcf33a14c558966eec3c2c38dd7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 21 May 2022 23:43:26 +0300 Subject: [PATCH 364/615] Update ExpressionActions.cpp --- src/Interpreters/ExpressionActions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 373c09ddd3c..2da53a2e258 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -297,7 +297,7 @@ static std::unordered_set processShortCircuitFunctions short_circuit_nodes[&node] = short_circuit_settings; } - /// If there is no short-circuit functions, no need to do anything. + /// If there are no short-circuit functions, no need to do anything. if (short_circuit_nodes.empty()) return {}; From b3bc0a18a0a7d1725f09474704afdf211fa73980 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 22 May 2022 00:13:01 +0200 Subject: [PATCH 365/615] fix test --- src/Disks/DiskObjectStorage.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 750a009ecf9..4d8efdea4cb 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -786,7 +786,11 @@ std::unique_ptr DiskObjectStorage::writeFile( [blob_name, count] (DiskObjectStorage::Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return object_storage->writeObject(fs::path(remote_fs_root_path) / blob_name, mode, object_attributes, std::move(create_metadata_callback), buf_size, settings); + /// We always use mode Rewrite because we simulate append using metadata and different files + return object_storage->writeObject( + fs::path(remote_fs_root_path) / blob_name, WriteMode::Rewrite, object_attributes, + std::move(create_metadata_callback), + buf_size, settings); } From 15a76d012fc5bfd52945bbc648ee88cc5c3e6721 Mon Sep 17 00:00:00 2001 From: Memo Date: Sun, 22 May 2022 13:38:47 +0800 Subject: [PATCH 366/615] add NUMBER_OF_ARGUMENTS_DOESNT_MATCH defination --- src/Functions/generateUUIDv4.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index e8309ea7234..e4ecf5358f9 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + #define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__) From 3a17180a4ffcb500111b92dc0a69574dcd6cf8d8 Mon Sep 17 00:00:00 2001 From: Memo Date: Sun, 22 May 2022 13:42:36 +0800 Subject: [PATCH 367/615] fix tests --- ...generate_multi_columns_with_uuid.reference | 103 +----------------- ...02310_generate_multi_columns_with_uuid.sql | 20 +--- 2 files changed, 6 insertions(+), 117 deletions(-) diff --git a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference index c6f93358b29..bb5ee5c21eb 100644 --- a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference +++ b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference @@ -1,100 +1,3 @@ -e2bb163f-34e6-4e9d-b034-ce007be68373 -8d29bbbd-1e08-4680-b5b0-cd246a93f609 -59daf935-7369-4e9c-8356-581b7f6fb533 -70ba5575-349c-4554-9db1-d8754c82d132 -b74ea91c-e15a-43d7-b459-f7dc5a981f7d -67f79647-9111-43c3-813c-60a040244e7f -d39ceea1-293c-4bf8-9efb-10ddef918ee4 -29b285bd-5a1c-49d7-b21a-ff3ec72b96fe -f90059a1-33fc-4b5b-95ee-68ed44875c2a -394164c6-7a5d-4c2f-be50-e34a8cd9e619 -b76f8ccb-9ec1-49eb-912f-c432cb4e0433 -fd211b9a-6aaa-4ad1-b720-9f91db6c89e7 -264ad5d0-5bd5-456d-a3d5-8ae9bccf532d -29063f15-c68e-4c34-8206-613a64944d62 -21557fee-5cc9-4b69-9d87-c66b93e78ba9 -5ae7c2d4-191d-41fc-8b9a-3c7365385536 -62829100-2e50-43b6-af28-e4b69a9c7046 -620eaf2a-22ba-4f63-8cea-a2abcf9d72ce -41663a3d-8006-4aea-9000-e8d56b4352fb -5e3322e8-02ee-4742-b344-83cc6d661525 -4ab46734-b75e-4cd6-b65c-f22e2cd91b48 -9099183c-d557-43d7-8ed6-657baee076cc -9c140710-1b03-4485-a746-b66003fb0842 -6aaf2122-4bdb-4179-852e-bc256235ca45 -9354a012-2b3c-486e-910f-0ec7f3415030 -87700127-b7db-4d95-bfd8-ce6f5938a2b7 -45b4f61d-f24d-43ab-822a-6b1926d6bbfc -cd3db3d5-fb0d-4de1-9cc3-d5a910c5f1d0 -96af3bc4-74f5-428d-8b1f-dca55b0c87cf -6ba5e92c-f48d-4bb5-a0c7-e14bcf679d78 -7580efeb-1717-4c0c-af1f-d11eb9f92d92 7580efeb-1717-4c0c-af1f-d11eb9f92d92 -2ec4be1a-bf75-4bf3-bc5f-56a55e90e569 2ec4be1a-bf75-4bf3-bc5f-56a55e90e569 -430c2fe5-0a31-4710-b39f-b6c221503bd1 430c2fe5-0a31-4710-b39f-b6c221503bd1 -94cc855c-54d7-4e03-bc57-fa90c1f23277 94cc855c-54d7-4e03-bc57-fa90c1f23277 -fe5c31f5-d249-4d3e-8b88-d4aa61fc3e21 fe5c31f5-d249-4d3e-8b88-d4aa61fc3e21 -7ddcc84d-3cfd-4793-8afc-b07f6ff06291 7ddcc84d-3cfd-4793-8afc-b07f6ff06291 -1d9fa2d5-4b6c-4219-8300-768e19978ca3 1d9fa2d5-4b6c-4219-8300-768e19978ca3 -213ec099-6a61-4e46-a90b-b31417393a6a 213ec099-6a61-4e46-a90b-b31417393a6a -d01dda38-ba5e-4b89-ad5f-96cd67040530 d01dda38-ba5e-4b89-ad5f-96cd67040530 -3697424a-a4e2-465b-8c0b-39255d0b0e73 3697424a-a4e2-465b-8c0b-39255d0b0e73 -ab301df3-1365-4b00-b4f6-24e6bb7026d6 ab301df3-1365-4b00-b4f6-24e6bb7026d6 -d841d809-ac28-41ac-8333-a7cb94d3baf0 d841d809-ac28-41ac-8333-a7cb94d3baf0 -32d02bfb-de35-4d87-8b5d-f39c1958a62a 32d02bfb-de35-4d87-8b5d-f39c1958a62a -6b8a8bf1-1a74-494b-842c-2398763620d3 6b8a8bf1-1a74-494b-842c-2398763620d3 -aecd5210-a235-4804-995a-1a1d30e574fe aecd5210-a235-4804-995a-1a1d30e574fe -cfb2e0d0-0c2f-4211-9c4c-704a59f3655b cfb2e0d0-0c2f-4211-9c4c-704a59f3655b -e124f4ec-58df-4bdd-b5d0-67a36f72f018 e124f4ec-58df-4bdd-b5d0-67a36f72f018 -defda77b-dcf1-4e6f-b3a9-53668562c852 defda77b-dcf1-4e6f-b3a9-53668562c852 -0fe4d450-7abb-4972-b66d-064822cfdfc0 0fe4d450-7abb-4972-b66d-064822cfdfc0 -7d9f4ec9-bcb6-4f5a-b602-3de1dd2d8f66 7d9f4ec9-bcb6-4f5a-b602-3de1dd2d8f66 -084451c1-8e55-49db-b6ef-caa8b4a9cb4a 13118c86-100a-4b5d-afd2-75b0f8fc4c99 -a53c9325-d821-44e3-9dee-2c551a79cedc 44ccc4a5-6873-4f07-9ce6-be86e33543c3 -de4ce7bb-753a-4a84-92b3-2e65a84deefd e6fe9df5-b4b4-40a4-bf83-9b745a28d6d0 -5c493fda-5aff-4d49-bccf-93c9b90b17de c6872ee1-9131-43bb-8d51-a29cf8a8cb6f -e98fd48e-36f5-4fe6-b20a-012cfdf8ec8d 61c4f2b3-7d7e-427d-8dac-1f488916bc9a -121f5fb5-afc4-4c25-8f13-b3c5d6ef3ea8 b2e12d1a-6872-45a0-9582-9a2761d937c6 -7cebfef1-aa83-4ff5-aa11-3f9521a72899 f7e6db87-28bc-4d76-a6ea-7981fbd7aebd -6637d6b5-ac02-4ace-8e0e-0ed378722926 28891a8d-c9da-4030-ade2-5a9973739bb0 -1b87e644-cbcd-449b-9fbe-5808cde60310 4d74060c-2535-4901-ac15-8b91c17ac609 -b9f8fa5e-a8bb-4b88-91d5-c3e5f45b8b88 db61ac3c-c259-40d2-aa02-477e12b8f2d2 -506a9df5-deed-4c29-8458-ace61436593d 72d234c4-765d-4742-bc22-edca3045d30f -0b5e27dc-4cb7-411f-823c-9ffa49b811f7 f874258c-4864-4db8-8c05-ce1f34c2b398 -ce1ad114-8e62-46e1-a3e3-0497db659211 1b274e59-12eb-456c-bdfa-cb8794bee7f6 -c4b2111b-63e6-4cdf-8b46-7962665e2221 5f027848-b3bb-438d-8e04-e167d5fb6e1d -73e0b973-b912-4641-ba24-c43cc0d1c123 a48badd5-4289-4de0-8a20-eb6b8b027787 -00f13861-4e0e-4745-8adf-2eda884af332 1d50b1ee-89fe-4988-9554-060017b11d88 -0158f347-bccd-4957-b998-daacf59af1cf 6b3b627a-9c70-42e4-bdc2-0a34516f249b -cb3423be-6bcd-4a78-9345-66ca2c0f5885 f653d21a-35ad-46b0-be38-906d86566947 -9f3c2570-6930-443d-a686-573244e4fcea 82640b98-82b7-46bd-8da2-a396be0d9e48 -77502331-f470-4a6b-a824-52fd6dc793bb a4f9c43e-6b47-4413-8189-56c5718af9fa -27b0d3e8-67dc-4480-a1e8-717a7ee0f5b4 e18165f7-f535-432e-90ef-4e1df31f7dd6 -fd1756ee-f6fb-46d1-b8dc-89a4f23ee5f6 7f1bfba8-1f44-4e1b-a8ad-57f055e1dce2 -2c6131ab-e39e-4de6-aafe-bf303c65568d 3c412a22-84d3-4996-997a-176d51d9018b -a4f8167c-30b6-4d93-b407-81a0a46e0d55 6d41b851-a04c-4f75-84fd-9042f2b36f08 -90f9b25b-5576-47f2-ae4a-2ba92e586dd6 de7dbbbe-b0dd-426a-b66d-b88f5d85642a -6570262b-f9ad-4e6e-a5d9-29beae87a756 ec8ca05a-d43f-4af7-8b73-0756d4231201 -b73ca0e5-57c3-4d89-847d-b8c69c65fcce 6508b391-ed65-4a68-872c-95317e23088b -b449e15b-1116-404a-ae92-4b0ceed0003c 51649521-d7e6-4dfe-8438-7178caf3e035 -0cbe3104-0582-44ad-b181-685ba0f9e771 d88f69da-8b2c-4076-a575-7e5e3e27987b -1a08f5f0-8223-43f1-8c24-9ea554e26ebf 8db1d4e1-c999-4950-a924-3a0bd3b8ca62 -28526658-b269-4cd8-ac2d-b1876cb4d2eb 28526658-b269-4cd8-ac2d-b1876cb4d2eb 28526658-b269-4cd8-ac2d-b1876cb4d2eb -970ef9d3-6152-4c09-9d15-41e9466eb5f8 970ef9d3-6152-4c09-9d15-41e9466eb5f8 970ef9d3-6152-4c09-9d15-41e9466eb5f8 -c7866f83-b108-4bd6-842c-9c0373bb78eb c7866f83-b108-4bd6-842c-9c0373bb78eb c7866f83-b108-4bd6-842c-9c0373bb78eb -f1fa3307-2892-493b-a512-ad96ac1e7af0 f1fa3307-2892-493b-a512-ad96ac1e7af0 f1fa3307-2892-493b-a512-ad96ac1e7af0 -26d0b9b7-e285-4698-ac3d-9022a43ec327 26d0b9b7-e285-4698-ac3d-9022a43ec327 26d0b9b7-e285-4698-ac3d-9022a43ec327 -540e0837-cd08-454d-9750-c37333789ce7 540e0837-cd08-454d-9750-c37333789ce7 540e0837-cd08-454d-9750-c37333789ce7 -e7c41258-128a-4a3b-a83a-361962997650 e7c41258-128a-4a3b-a83a-361962997650 e7c41258-128a-4a3b-a83a-361962997650 -c73addc8-b81a-4102-afe4-19debaef5f13 c73addc8-b81a-4102-afe4-19debaef5f13 c73addc8-b81a-4102-afe4-19debaef5f13 -c6b7be30-df08-48fb-9181-7cd3446316d1 c6b7be30-df08-48fb-9181-7cd3446316d1 c6b7be30-df08-48fb-9181-7cd3446316d1 -1ab92b02-929d-4c33-8a32-4d9725af967d 1ab92b02-929d-4c33-8a32-4d9725af967d 1ab92b02-929d-4c33-8a32-4d9725af967d -07cf59d3-19fa-4844-b13f-5bb0579aa997 d4b545cb-628f-4274-8e33-da8efdafad5e b27a199a-8ad7-468a-82d8-5a9515bfc72b -34e555c7-8e27-4920-a4ec-33a8e82ac6d9 57e9eb17-edc4-4a66-9eac-640905d49fbd 55975cdc-ded4-4c29-8d7d-5cd2e40c26cc -700001d8-6fb5-4ce6-9f05-c2e11c95c65a 6101d5c3-3c8d-4d5e-8fb3-f03ff3d655d4 d8ba87c4-0c8c-435b-ab65-21bf5f1d8795 -d4ae32bf-6953-4dac-937c-e831e7b7ae44 ce7fe0e9-1a10-47f2-8f4b-0b81a714d565 efae9196-e000-4097-9770-6a17895b3cf6 -746d3b36-2b6f-49e3-9c13-9c8460ce3baf 32421cd4-aa84-4efd-9595-0fe16b1e37b5 71881fc4-bc88-4cf6-9355-b4d91eab0822 -715b41c7-e111-44f4-9122-f339886569e4 9c5def57-faf7-4f37-96aa-0a4a420fee39 51eb2c96-cfa5-4a9d-9f62-c305c0e331f8 -8acab48b-ffe1-4065-bb59-72ae019b29b7 2f2cdd40-bd22-484e-be1e-e084871f5cfe 1538129a-ff7f-490a-94e8-66e7a04dc57d -458c0e36-009b-4db7-b00b-55f58e5b8072 c2cffb5b-dc7d-47c2-b53c-6cad1e00f803 950194b8-c048-442c-a73f-f36a48665e83 -31f3f0f5-33fe-456b-bce5-ad58d94e00de 4c0f14f9-9976-4a66-8480-bfde5b9c392f f1806bfb-fce0-4b10-80d6-f0aaa0198c41 -962b1fe7-331d-4398-9cbd-bda028709e27 7413b5d2-95fc-4d66-a642-8009f4543c78 3dccbb14-e511-4113-aff9-0a12789c3800 +0 +0 +1 diff --git a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql index 99f6b1dda86..05f0b7141e6 100644 --- a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql +++ b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql @@ -1,19 +1,5 @@ -select generateUUIDv4() from numbers(10); +SELECT generateUUIDv4(1) = generateUUIDv4(2); -select generateUUIDv4(1) from numbers(10); +SELECT generateUUIDv4() = generateUUIDv4(1); -select generateUUIDv4(2) from numbers(10); - -select generateUUIDv4(1), generateUUIDv4(1) from numbers(10); - -select generateUUIDv4(), generateUUIDv4() from numbers(10); - -select generateUUIDv4(1), generateUUIDv4() from numbers(10); - -select generateUUIDv4(), generateUUIDv4(1) from numbers(10); - -select generateUUIDv4(1), generateUUIDv4(2) from numbers(10); - -select generateUUIDv4(1), generateUUIDv4(1), generateUUIDv4(1) from numbers(10); - -select generateUUIDv4(1), generateUUIDv4(2), generateUUIDv4(3) from numbers(10); +SELECT generateUUIDv4(1) = generateUUIDv4(1); \ No newline at end of file From aa503f699af19bb7e13e15295aa7257b12ef5ad7 Mon Sep 17 00:00:00 2001 From: Memo Date: Sun, 22 May 2022 13:43:05 +0800 Subject: [PATCH 368/615] fix tests --- .../0_stateless/02310_generate_multi_columns_with_uuid.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql index 05f0b7141e6..3ab19446b3e 100644 --- a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql +++ b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql @@ -2,4 +2,4 @@ SELECT generateUUIDv4(1) = generateUUIDv4(2); SELECT generateUUIDv4() = generateUUIDv4(1); -SELECT generateUUIDv4(1) = generateUUIDv4(1); \ No newline at end of file +SELECT generateUUIDv4(1) = generateUUIDv4(1); From 5f84f06d6d26672da3d97d0b236ebb46b5080989 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 22 May 2022 10:13:16 +0000 Subject: [PATCH 369/615] simplify windowview --- src/Core/Defines.h | 2 - src/Core/Settings.h | 5 +- src/Storages/WindowView/StorageWindowView.cpp | 48 ++++--------------- src/Storages/WindowView/StorageWindowView.h | 1 + 4 files changed, 13 insertions(+), 43 deletions(-) diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 4ff48b8ff63..9665a20a397 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -33,8 +33,6 @@ #define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5 #define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60 -#define DEFAULT_WINDOW_VIEW_CLEAN_INTERVAL_SEC 5 -#define DEFAULT_WINDOW_VIEW_HEARTBEAT_INTERVAL_SEC 15 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160) #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 /// each period reduces the error counter by 2 times diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2038b385b16..2246e757efa 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -435,8 +435,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ - M(Seconds, window_view_clean_interval, DEFAULT_WINDOW_VIEW_CLEAN_INTERVAL_SEC, "The clean interval of window view in seconds to free outdated data.", 0) \ - M(Seconds, window_view_heartbeat_interval, DEFAULT_WINDOW_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ + M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ + M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ + M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \ diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 119f23b65f2..a44b8954e3c 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -272,40 +272,13 @@ namespace } }; - IntervalKind strToIntervalKind(const String& interval_str) - { - if (interval_str == "Nanosecond") - return IntervalKind::Nanosecond; - else if (interval_str == "Microsecond") - return IntervalKind::Microsecond; - else if (interval_str == "Millisecond") - return IntervalKind::Millisecond; - else if (interval_str == "Second") - return IntervalKind::Second; - else if (interval_str == "Minute") - return IntervalKind::Minute; - else if (interval_str == "Hour") - return IntervalKind::Hour; - else if (interval_str == "Day") - return IntervalKind::Day; - else if (interval_str == "Week") - return IntervalKind::Week; - else if (interval_str == "Month") - return IntervalKind::Month; - else if (interval_str == "Quarter") - return IntervalKind::Quarter; - else if (interval_str == "Year") - return IntervalKind::Year; - __builtin_unreachable(); - } - void extractWindowArgument(const ASTPtr & ast, IntervalKind::Kind & kind, Int64 & num_units, String err_msg) { const auto * arg = ast->as(); - if (!arg || !startsWith(arg->name, "toInterval")) + if (!arg || !startsWith(arg->name, "toInterval") + || !IntervalKind::tryParseString(Poco::toLower(arg->name.substr(10)), kind)) throw Exception(err_msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - kind = strToIntervalKind(arg->name.substr(10)); const auto * interval_unit = arg->children.front()->children.front()->as(); if (!interval_unit || (interval_unit->value.getType() != Field::Types::String @@ -1061,7 +1034,7 @@ void StorageWindowView::threadFuncCleanup() } if (!shutdown_called) - clean_cache_task->scheduleAfter(1000); + clean_cache_task->scheduleAfter(clean_interval_ms); } void StorageWindowView::threadFuncFireProc() @@ -1102,7 +1075,7 @@ void StorageWindowView::threadFuncFireEvent() std::unique_lock lock(fire_signal_mutex); while (!shutdown_called) { - bool signaled = std::cv_status::no_timeout == fire_signal_condition.wait_for(lock, std::chrono::seconds(5)); + bool signaled = std::cv_status::no_timeout == fire_signal_condition.wait_for(lock, std::chrono::seconds(fire_signal_timeout_s)); if (!signaled) continue; @@ -1229,6 +1202,7 @@ StorageWindowView::StorageWindowView( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) + , fire_signal_timeout_s(context_->getSettingsRef().wait_for_window_view_fire_signal_timeout.totalSeconds()) , clean_interval_ms(context_->getSettingsRef().window_view_clean_interval.totalMilliseconds()) { if (!query.select) @@ -1497,14 +1471,10 @@ void StorageWindowView::writeIntoWindowView( if (lateness_bound > 0) /// Add filter, which leaves rows with timestamp >= lateness_bound { - ASTPtr args = std::make_shared(); - args->children.push_back(std::make_shared(window_view.timestamp_column_name)); - args->children.push_back(std::make_shared(lateness_bound)); - - auto filter_function = std::make_shared(); - filter_function->name = "greaterOrEquals"; - filter_function->arguments = args; - filter_function->children.push_back(filter_function->arguments); + auto filter_function = makeASTFunction( + "greaterOrEquals", + std::make_shared(window_view.timestamp_column_name), + std::make_shared(lateness_bound)); ASTPtr query = filter_function; NamesAndTypesList columns; diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 709ffd9c58e..310665448d1 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -203,6 +203,7 @@ private: bool inner_target_table{false}; mutable Block input_header; mutable Block output_header; + UInt64 fire_signal_timeout_s; UInt64 clean_interval_ms; const DateLUTImpl * time_zone = nullptr; UInt32 max_timestamp = 0; From eb69d963e27b4f00d7d689a46d177a5103890dc8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 22 May 2022 12:16:50 +0200 Subject: [PATCH 370/615] Missed change --- src/Disks/DiskObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/DiskObjectStorage.cpp index 4d8efdea4cb..89eb5d4a63d 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/DiskObjectStorage.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes extern const int INCORRECT_DISK_INDEX; extern const int UNKNOWN_FORMAT; extern const int FILE_ALREADY_EXISTS; - extern const int PATH_ACCESS_DENIED;; + extern const int PATH_ACCESS_DENIED; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; extern const int MEMORY_LIMIT_EXCEEDED; From fcd317a64264b690cedea6697194d0fa1378abdc Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 22 May 2022 10:18:34 +0000 Subject: [PATCH 371/615] update windowview create syntax --- src/Parsers/ParserCreateQuery.h | 2 +- src/Storages/WindowView/StorageWindowView.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 6a34e1d2700..29cd08554b5 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -419,7 +419,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/// CREATE|ATTACH WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] [INNER ENGINE [db.]name] [ENGINE [db.]name] [WATERMARK function] [ALLOWED_LATENESS = interval_function] [POPULATE] AS SELECT ... +/// CREATE|ATTACH WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE] AS SELECT ... class ParserCreateWindowViewQuery : public IParserBase { protected: diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 310665448d1..d9343aa03ac 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -18,8 +18,9 @@ using ASTPtr = std::shared_ptr; * StorageWindowView. * * CREATE WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] - * [ENGINE [db.]name] + * [INNER ENGINE engine] [ENGINE engine] * [WATERMARK strategy] [ALLOWED_LATENESS interval_function] + * [POPULATE] * AS SELECT ... * GROUP BY [tumble/hop(...)] * From dff8c0e4f06273bcb9e02c190746d89c931ec595 Mon Sep 17 00:00:00 2001 From: Ilya Strukov Date: Sun, 22 May 2022 12:19:18 +0200 Subject: [PATCH 372/615] Fix a tiny wording issue Replace "there are another ways" with "there are other ways" --- docs/en/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index f31a78bc1c4..12775749a25 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -238,7 +238,7 @@ To start the server as a daemon, run: $ sudo clickhouse start ``` -There are also another ways to run ClickHouse: +There are also other ways to run ClickHouse: ``` bash $ sudo service clickhouse-server start From 06c3dd69c002342f60c02bd5a870b1ba7cbf7044 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 22 May 2022 13:51:48 +0200 Subject: [PATCH 373/615] Move directories --- src/CMakeLists.txt | 7 +- src/Disks/DiskWebServer.cpp | 3 +- src/Disks/IDisk.h | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 2 +- .../AzureBlobStorage/AzureBlobStorageAuth.cpp | 2 +- .../AzureBlobStorage/AzureBlobStorageAuth.h | 2 +- .../AzureBlobStorage}/AzureObjectStorage.cpp | 5 +- .../AzureBlobStorage}/AzureObjectStorage.h | 2 +- .../registerDiskAzureBlobStorage.cpp | 7 +- .../DiskObjectStorage.cpp | 522 +---------------- .../{ => ObjectStorages}/DiskObjectStorage.h | 66 +-- .../DiskObjectStorageMetadataHelper.cpp | 541 ++++++++++++++++++ .../DiskObjectStorageMetadataHelper.h | 77 +++ .../HDFS}/HDFSObjectStorage.cpp | 4 +- .../HDFS}/HDFSObjectStorage.h | 2 +- .../HDFS/registerDiskHDFS.cpp | 4 +- .../{ => ObjectStorages}/IObjectStorage.cpp | 3 +- .../{ => ObjectStorages}/IObjectStorage.h | 26 +- .../S3/ProxyConfiguration.h | 0 .../S3/ProxyListConfiguration.cpp | 0 .../S3/ProxyListConfiguration.h | 0 .../S3/ProxyResolverConfiguration.cpp | 0 .../S3/ProxyResolverConfiguration.h | 0 .../S3}/S3ObjectStorage.cpp | 6 +- .../{ => ObjectStorages/S3}/S3ObjectStorage.h | 2 +- .../{ => ObjectStorages}/S3/diskSettings.cpp | 2 +- .../{ => ObjectStorages}/S3/diskSettings.h | 8 +- .../{ => ObjectStorages}/S3/parseConfig.h | 0 .../S3/registerDiskS3.cpp | 13 +- src/Interpreters/Context.cpp | 2 +- 30 files changed, 689 insertions(+), 621 deletions(-) rename src/Disks/{ => ObjectStorages}/AzureBlobStorage/AzureBlobStorageAuth.cpp (98%) rename src/Disks/{ => ObjectStorages}/AzureBlobStorage/AzureBlobStorageAuth.h (87%) rename src/Disks/{ => ObjectStorages/AzureBlobStorage}/AzureObjectStorage.cpp (98%) rename src/Disks/{ => ObjectStorages/AzureBlobStorage}/AzureObjectStorage.h (98%) rename src/Disks/{ => ObjectStorages}/AzureBlobStorage/registerDiskAzureBlobStorage.cpp (95%) rename src/Disks/{ => ObjectStorages}/DiskObjectStorage.cpp (57%) rename src/Disks/{ => ObjectStorages}/DiskObjectStorage.h (79%) create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h rename src/Disks/{ => ObjectStorages/HDFS}/HDFSObjectStorage.cpp (98%) rename src/Disks/{ => ObjectStorages/HDFS}/HDFSObjectStorage.h (98%) rename src/Disks/{ => ObjectStorages}/HDFS/registerDiskHDFS.cpp (94%) rename src/Disks/{ => ObjectStorages}/IObjectStorage.cpp (96%) rename src/Disks/{ => ObjectStorages}/IObjectStorage.h (74%) rename src/Disks/{ => ObjectStorages}/S3/ProxyConfiguration.h (100%) rename src/Disks/{ => ObjectStorages}/S3/ProxyListConfiguration.cpp (100%) rename src/Disks/{ => ObjectStorages}/S3/ProxyListConfiguration.h (100%) rename src/Disks/{ => ObjectStorages}/S3/ProxyResolverConfiguration.cpp (100%) rename src/Disks/{ => ObjectStorages}/S3/ProxyResolverConfiguration.h (100%) rename src/Disks/{ => ObjectStorages/S3}/S3ObjectStorage.cpp (99%) rename src/Disks/{ => ObjectStorages/S3}/S3ObjectStorage.h (99%) rename src/Disks/{ => ObjectStorages}/S3/diskSettings.cpp (99%) rename src/Disks/{ => ObjectStorages}/S3/diskSettings.h (74%) rename src/Disks/{ => ObjectStorages}/S3/parseConfig.h (100%) rename src/Disks/{ => ObjectStorages}/S3/registerDiskS3.cpp (91%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8f6e894a100..10bdc464ac6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -87,6 +87,7 @@ add_headers_and_sources(clickhouse_common_io IO/S3) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) add_headers_and_sources(dbms Disks/IO) +add_headers_and_sources(dbms Disks/ObjectStorages) if (TARGET ch_contrib::sqlite) add_headers_and_sources(dbms Databases/SQLite) endif() @@ -113,16 +114,16 @@ endif() if (TARGET ch_contrib::aws_s3) add_headers_and_sources(dbms Common/S3) - add_headers_and_sources(dbms Disks/S3) + add_headers_and_sources(dbms Disks/ObjectStorages/S3) endif() if (TARGET ch_contrib::azure_sdk) - add_headers_and_sources(dbms Disks/AzureBlobStorage) + add_headers_and_sources(dbms Disks/ObjectStorages/AzureBlobStorage) endif() if (TARGET ch_contrib::hdfs) add_headers_and_sources(dbms Storages/HDFS) - add_headers_and_sources(dbms Disks/HDFS) + add_headers_and_sources(dbms Disks/ObjectStorages/HDFS) endif() add_headers_and_sources(dbms Storages/Cache) diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 4f1fc1ad8fb..b8a0d12d6c1 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -10,12 +10,11 @@ #include #include -#include +#include #include #include - #include #include #include diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index ab9f7abae1c..ce6cc84c3f3 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index e7eb6296a19..eb6d26a4c15 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #if USE_AZURE_BLOB_STORAGE #include diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp similarity index 98% rename from src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index c078f584a09..3dcdd47826f 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h similarity index 87% rename from src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h index fcd4fd51b49..6ebe169af50 100644 --- a/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h @@ -5,7 +5,7 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include namespace DB { diff --git a/src/Disks/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp similarity index 98% rename from src/Disks/AzureObjectStorage.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 75a602760a7..4ea7c609a51 100644 --- a/src/Disks/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE @@ -6,7 +6,8 @@ #include #include #include -#include + +#include namespace DB { diff --git a/src/Disks/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h similarity index 98% rename from src/Disks/AzureObjectStorage.h rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index da6393fd55d..9012449e284 100644 --- a/src/Disks/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include diff --git a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp similarity index 95% rename from src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index e111406a587..faaec6ee95c 100644 --- a/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -7,9 +7,10 @@ #include #include #include -#include -#include -#include +#include + +#include +#include namespace DB { diff --git a/src/Disks/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp similarity index 57% rename from src/Disks/DiskObjectStorage.cpp rename to src/Disks/ObjectStorages/DiskObjectStorage.cpp index 89eb5d4a63d..b1a396ffee6 100644 --- a/src/Disks/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -14,7 +14,7 @@ #include #include #include - +#include #include namespace DB @@ -858,522 +858,4 @@ DiskObjectStorageReservation::~DiskObjectStorageReservation() } -void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const -{ - const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; - auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite, metadata); - buf->write('0'); - buf->finalize(); -} - -void DiskObjectStorageMetadataHelper::findLastRevision() -{ - /// Construct revision number from high to low bits. - String revision; - revision.reserve(64); - for (int bit = 0; bit < 64; ++bit) - { - auto revision_prefix = revision + "1"; - - LOG_TRACE(disk->log, "Check object exists with revision prefix {}", revision_prefix); - - /// Check file or operation with such revision prefix exists. - if (disk->object_storage->exists(disk->remote_fs_root_path + "r" + revision_prefix) - || disk->object_storage->exists(disk->remote_fs_root_path + "operations/r" + revision_prefix)) - revision += "1"; - else - revision += "0"; - } - revision_counter = static_cast(std::bitset<64>(revision).to_ullong()); - LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); -} - -int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) -{ - const std::string path = source_path + SCHEMA_VERSION_OBJECT; - int version = 0; - if (!object_storage->exists(path)) - return version; - - auto buf = object_storage->readObject(path); - readIntText(version, *buf); - - return version; -} - -void DiskObjectStorageMetadataHelper::saveSchemaVersion(const int & version) const -{ - auto path = disk->remote_fs_root_path + SCHEMA_VERSION_OBJECT; - - auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite); - writeIntText(version, *buf); - buf->finalize(); - -} - -void DiskObjectStorageMetadataHelper::updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const -{ - disk->object_storage->copyObject(key, key, metadata); -} - -void DiskObjectStorageMetadataHelper::migrateFileToRestorableSchema(const String & path) const -{ - LOG_TRACE(disk->log, "Migrate file {} to restorable schema", disk->metadata_disk->getPath() + path); - - auto meta = disk->readMetadata(path); - - for (const auto & [key, _] : meta.remote_fs_objects) - { - ObjectAttributes metadata { - {"path", path} - }; - updateObjectMetadata(disk->remote_fs_root_path + key, metadata); - } -} -void DiskObjectStorageMetadataHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) -{ - checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. - - LOG_TRACE(disk->log, "Migrate directory {} to restorable schema", disk->metadata_disk->getPath() + path); - - bool dir_contains_only_files = true; - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - { - if (disk->isDirectory(it->path())) - { - dir_contains_only_files = false; - break; - } - } - - /// The whole directory can be migrated asynchronously. - if (dir_contains_only_files) - { - auto result = disk->getExecutor().execute([this, path] - { - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - migrateFileToRestorableSchema(it->path()); - }); - - results.push_back(std::move(result)); - } - else - { - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - if (!disk->isDirectory(it->path())) - { - auto source_path = it->path(); - auto result = disk->getExecutor().execute([this, source_path] - { - migrateFileToRestorableSchema(source_path); - }); - - results.push_back(std::move(result)); - } - else - migrateToRestorableSchemaRecursive(it->path(), results); - } - -} - -void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() -{ - try - { - LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name); - - Futures results; - - for (const auto & root : data_roots) - if (disk->exists(root)) - migrateToRestorableSchemaRecursive(root + '/', results); - - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); - - saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); - } - catch (const Exception &) - { - tryLogCurrentException(disk->log, fmt::format("Failed to migrate to restorable schema for disk {}", disk->name)); - - throw; - } -} - -void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) -{ - LOG_INFO(disk->log, "Restore operation for disk {} called", disk->name); - - if (!disk->exists(RESTORE_FILE_NAME)) - { - LOG_INFO(disk->log, "No restore file '{}' exists, finishing restore", RESTORE_FILE_NAME); - return; - } - - try - { - RestoreInformation information; - information.source_path = disk->remote_fs_root_path; - information.source_namespace = disk->object_storage->getObjectsNamespace(); - - readRestoreInformation(information); - if (information.revision == 0) - information.revision = LATEST_REVISION; - if (!information.source_path.ends_with('/')) - information.source_path += '/'; - - IObjectStorage * source_object_storage = disk->object_storage.get(); - if (information.source_namespace == disk->object_storage->getObjectsNamespace()) - { - /// In this case we need to additionally cleanup S3 from objects with later revision. - /// Will be simply just restore to different path. - if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) - throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); - - /// This case complicates S3 cleanup in case of unsuccessful restore. - if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) - throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); - } - else - { - object_storage_from_another_namespace = disk->object_storage->cloneObjectStorage(information.source_namespace, config, config_prefix, context); - source_object_storage = object_storage_from_another_namespace.get(); - } - - LOG_INFO(disk->log, "Starting to restore disk {}. Revision: {}, Source path: {}", - disk->name, information.revision, information.source_path); - - if (readSchemaVersion(source_object_storage, information.source_path) < RESTORABLE_SCHEMA_VERSION) - throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); - - LOG_INFO(disk->log, "Removing old metadata..."); - - bool cleanup_s3 = information.source_path != disk->remote_fs_root_path; - for (const auto & root : data_roots) - if (disk->exists(root)) - disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); - - LOG_INFO(disk->log, "Old metadata removed, restoring new one"); - restoreFiles(source_object_storage, information); - restoreFileOperations(source_object_storage, information); - - disk->metadata_disk->removeFile(RESTORE_FILE_NAME); - - saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); - - LOG_INFO(disk->log, "Restore disk {} finished", disk->name); - } - catch (const Exception &) - { - tryLogCurrentException(disk->log, fmt::format("Failed to restore disk {}", disk->name)); - - throw; - } -} - -void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) /// NOLINT -{ - auto buffer = disk->metadata_disk->readFile(RESTORE_FILE_NAME, ReadSettings{}, 512); - buffer->next(); - - try - { - std::map properties; - - while (buffer->hasPendingData()) - { - String property; - readText(property, *buffer); - assertChar('\n', *buffer); - - auto pos = property.find('='); - if (pos == std::string::npos || pos == 0 || pos == property.length()) - throw Exception(fmt::format("Invalid property {} in restore file", property), ErrorCodes::UNKNOWN_FORMAT); - - auto key = property.substr(0, pos); - auto value = property.substr(pos + 1); - - auto it = properties.find(key); - if (it != properties.end()) - throw Exception(fmt::format("Property key duplication {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); - - properties[key] = value; - } - - for (const auto & [key, value] : properties) - { - ReadBufferFromString value_buffer(value); - - if (key == "revision") - readIntText(restore_information.revision, value_buffer); - else if (key == "source_bucket" || key == "source_namespace") - readText(restore_information.source_namespace, value_buffer); - else if (key == "source_path") - readText(restore_information.source_path, value_buffer); - else if (key == "detached") - readBoolTextWord(restore_information.detached, value_buffer); - else - throw Exception(fmt::format("Unknown key {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); - } - } - catch (const Exception &) - { - tryLogCurrentException(disk->log, "Failed to read restore information"); - throw; - } -} - -static String shrinkKey(const String & path, const String & key) -{ - if (!key.starts_with(path)) - throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR); - - return key.substr(path.length()); -} - -static std::tuple extractRevisionAndOperationFromKey(const String & key) -{ - String revision_str; - String operation; - /// Key has format: ../../r{revision}-{operation} - static const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; - - re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); - - return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; -} - -void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) -{ - LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); - - std::vector> results; - auto restore_files = [this, &source_object_storage, &restore_information, &results](const BlobsPathToSize & keys) - { - std::vector keys_names; - for (const auto & [key, size] : keys) - { - - LOG_INFO(disk->log, "Calling restore for key for disk {}", key); - - /// Skip file operations objects. They will be processed separately. - if (key.find("/operations/") != String::npos) - continue; - - const auto [revision, _] = extractRevisionAndOperationFromKey(key); - /// Filter early if it's possible to get revision from key. - if (revision > restore_information.revision) - continue; - - keys_names.push_back(key); - } - - if (!keys_names.empty()) - { - auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]() - { - processRestoreFiles(source_object_storage, restore_information.source_path, keys_names); - }); - - results.push_back(std::move(result)); - } - - return true; - }; - - BlobsPathToSize children; - source_object_storage->listPrefix(restore_information.source_path, children); - - restore_files(children); - - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); - - LOG_INFO(disk->log, "Files are restored for disk {}", disk->name); - -} - -void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const -{ - for (const auto & key : keys) - { - auto meta = source_object_storage->getObjectMetadata(key); - auto object_attributes = meta.attributes; - - String path; - if (object_attributes.has_value()) - { - /// Restore file if object has 'path' in metadata. - auto path_entry = object_attributes->find("path"); - if (path_entry == object_attributes->end()) - { - /// Such keys can remain after migration, we can skip them. - LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); - continue; - } - - path = path_entry->second; - } - else - continue; - - - disk->createDirectories(directoryPath(path)); - auto relative_key = shrinkKey(source_path, key); - - /// Copy object if we restore to different bucket / path. - if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != source_path) - source_object_storage->copyObjectToAnotherObjectStorage(key, disk->remote_fs_root_path + relative_key, *disk->object_storage); - - auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) - { - metadata.addObject(relative_key, meta.size_bytes); - return true; - }; - - disk->createUpdateAndStoreMetadata(path, false, updater); - - LOG_TRACE(disk->log, "Restored file {}", path); - } - -} - -void DiskObjectStorage::onFreeze(const String & path) -{ - createDirectories(path); - auto revision_file_buf = metadata_disk->writeFile(path + "revision.txt", 32); - writeIntText(metadata_helper->revision_counter.load(), *revision_file_buf); - revision_file_buf->finalize(); -} - -static String pathToDetached(const String & source_path) -{ - if (source_path.ends_with('/')) - return fs::path(source_path).parent_path().parent_path() / "detached/"; - return fs::path(source_path).parent_path() / "detached/"; -} - -void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) -{ - /// Enable recording file operations if we restore to different bucket / path. - bool send_metadata = source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != restore_information.source_path; - - std::set renames; - auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) - { - const String rename = "rename"; - const String hardlink = "hardlink"; - - for (const auto & [key, _]: keys) - { - const auto [revision, operation] = extractRevisionAndOperationFromKey(key); - if (revision == UNKNOWN_REVISION) - { - LOG_WARNING(disk->log, "Skip key {} with unknown revision", key); - continue; - } - - /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order). - /// We can stop processing if revision of the object is already more than required. - if (revision > restore_information.revision) - return false; - - /// Keep original revision if restore to different bucket / path. - if (send_metadata) - revision_counter = revision - 1; - - auto object_attributes = *(source_object_storage->getObjectMetadata(key).attributes); - if (operation == rename) - { - auto from_path = object_attributes["from_path"]; - auto to_path = object_attributes["to_path"]; - if (disk->exists(from_path)) - { - disk->moveFile(from_path, to_path, send_metadata); - - LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); - - if (restore_information.detached && disk->isDirectory(to_path)) - { - /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way. - if (!from_path.ends_with('/')) - from_path += '/'; - if (!to_path.ends_with('/')) - to_path += '/'; - - /// Always keep latest actual directory path to avoid 'detaching' not existing paths. - auto it = renames.find(from_path); - if (it != renames.end()) - renames.erase(it); - - renames.insert(to_path); - } - } - } - else if (operation == hardlink) - { - auto src_path = object_attributes["src_path"]; - auto dst_path = object_attributes["dst_path"]; - if (disk->exists(src_path)) - { - disk->createDirectories(directoryPath(dst_path)); - disk->createHardLink(src_path, dst_path, send_metadata); - LOG_TRACE(disk->log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); - } - } - } - - return true; - }; - - BlobsPathToSize children; - source_object_storage->listPrefix(restore_information.source_path + "operations/", children); - restore_file_operations(children); - - if (restore_information.detached) - { - Strings not_finished_prefixes{"tmp_", "delete_tmp_", "attaching_", "deleting_"}; - - for (const auto & path : renames) - { - /// Skip already detached parts. - if (path.find("/detached/") != std::string::npos) - continue; - - /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. - fs::path directory_path(path); - auto directory_name = directory_path.parent_path().filename().string(); - - auto predicate = [&directory_name](String & prefix) { return directory_name.starts_with(prefix); }; - if (std::any_of(not_finished_prefixes.begin(), not_finished_prefixes.end(), predicate)) - continue; - - auto detached_path = pathToDetached(path); - - LOG_TRACE(disk->log, "Move directory to 'detached' {} -> {}", path, detached_path); - - fs::path from_path = fs::path(path); - fs::path to_path = fs::path(detached_path); - if (path.ends_with('/')) - to_path /= from_path.parent_path().filename(); - else - to_path /= from_path.filename(); - - /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename - if (disk->metadata_disk->exists(to_path)) - disk->metadata_disk->removeRecursive(to_path); - - disk->createDirectories(directoryPath(to_path)); - disk->metadata_disk->moveDirectory(from_path, to_path); - } - } - - LOG_INFO(disk->log, "File operations restored for disk {}", disk->name); -} - } diff --git a/src/Disks/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h similarity index 79% rename from src/Disks/DiskObjectStorage.h rename to src/Disks/ObjectStorages/DiskObjectStorage.h index 7ddd3fa6798..e7cbb04ff99 100644 --- a/src/Disks/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -1,7 +1,8 @@ #pragma once #include -#include +#include +#include #include namespace CurrentMetrics @@ -12,8 +13,6 @@ namespace CurrentMetrics namespace DB { -class DiskObjectStorageMetadataHelper; - class DiskObjectStorage : public IDisk { @@ -41,11 +40,11 @@ public: struct Metadata; using MetadataUpdater = std::function; - const String & getName() const final override { return name; } + const String & getName() const override { return name; } - const String & getPath() const final override { return metadata_disk->getPath(); } + const String & getPath() const override { return metadata_disk->getPath(); } - std::vector getRemotePaths(const String & local_path) const final override; + std::vector getRemotePaths(const String & local_path) const override; void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; @@ -277,59 +276,4 @@ private: CurrentMetrics::Increment metric_increment; }; -class DiskObjectStorageMetadataHelper -{ -public: - static constexpr UInt64 LATEST_REVISION = std::numeric_limits::max(); - static constexpr UInt64 UNKNOWN_REVISION = 0; - - DiskObjectStorageMetadataHelper(DiskObjectStorage * disk_, ReadSettings read_settings_) - : disk(disk_) - , read_settings(std::move(read_settings_)) - { - } - - struct RestoreInformation - { - UInt64 revision = LATEST_REVISION; - String source_namespace; - String source_path; - bool detached = false; - }; - - using Futures = std::vector>; - - void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; - void findLastRevision(); - - static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); - void saveSchemaVersion(const int & version) const; - void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; - void migrateFileToRestorableSchema(const String & path) const; - void migrateToRestorableSchemaRecursive(const String & path, Futures & results); - void migrateToRestorableSchema(); - - void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); - void readRestoreInformation(RestoreInformation & restore_information); - void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); - void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const; - void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); - - std::atomic revision_counter = 0; - inline static const String RESTORE_FILE_NAME = "restore"; - - /// Object contains information about schema version. - inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; - /// Version with possibility to backup-restore metadata. - static constexpr int RESTORABLE_SCHEMA_VERSION = 1; - /// Directories with data. - const std::vector data_roots {"data", "store"}; - - DiskObjectStorage * disk; - - ObjectStoragePtr object_storage_from_another_namespace; - - ReadSettings read_settings; -}; - } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp new file mode 100644 index 00000000000..8e680663358 --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp @@ -0,0 +1,541 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_FORMAT; + extern const int LOGICAL_ERROR; +} + +static String revisionToString(UInt64 revision) +{ + return std::bitset<64>(revision).to_string(); +} + +void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const +{ + const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; + auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite, metadata); + buf->write('0'); + buf->finalize(); +} + +void DiskObjectStorageMetadataHelper::findLastRevision() +{ + /// Construct revision number from high to low bits. + String revision; + revision.reserve(64); + for (int bit = 0; bit < 64; ++bit) + { + auto revision_prefix = revision + "1"; + + LOG_TRACE(disk->log, "Check object exists with revision prefix {}", revision_prefix); + + /// Check file or operation with such revision prefix exists. + if (disk->object_storage->exists(disk->remote_fs_root_path + "r" + revision_prefix) + || disk->object_storage->exists(disk->remote_fs_root_path + "operations/r" + revision_prefix)) + revision += "1"; + else + revision += "0"; + } + revision_counter = static_cast(std::bitset<64>(revision).to_ullong()); + LOG_INFO(disk->log, "Found last revision number {} for disk {}", revision_counter, disk->name); +} + +int DiskObjectStorageMetadataHelper::readSchemaVersion(IObjectStorage * object_storage, const String & source_path) +{ + const std::string path = source_path + SCHEMA_VERSION_OBJECT; + int version = 0; + if (!object_storage->exists(path)) + return version; + + auto buf = object_storage->readObject(path); + readIntText(version, *buf); + + return version; +} + +void DiskObjectStorageMetadataHelper::saveSchemaVersion(const int & version) const +{ + auto path = disk->remote_fs_root_path + SCHEMA_VERSION_OBJECT; + + auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite); + writeIntText(version, *buf); + buf->finalize(); + +} + +void DiskObjectStorageMetadataHelper::updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const +{ + disk->object_storage->copyObject(key, key, metadata); +} + +void DiskObjectStorageMetadataHelper::migrateFileToRestorableSchema(const String & path) const +{ + LOG_TRACE(disk->log, "Migrate file {} to restorable schema", disk->metadata_disk->getPath() + path); + + auto meta = disk->readMetadata(path); + + for (const auto & [key, _] : meta.remote_fs_objects) + { + ObjectAttributes metadata { + {"path", path} + }; + updateObjectMetadata(disk->remote_fs_root_path + key, metadata); + } +} +void DiskObjectStorageMetadataHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) +{ + checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. + + LOG_TRACE(disk->log, "Migrate directory {} to restorable schema", disk->metadata_disk->getPath() + path); + + bool dir_contains_only_files = true; + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + { + if (disk->isDirectory(it->path())) + { + dir_contains_only_files = false; + break; + } + } + + /// The whole directory can be migrated asynchronously. + if (dir_contains_only_files) + { + auto result = disk->getExecutor().execute([this, path] + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + migrateFileToRestorableSchema(it->path()); + }); + + results.push_back(std::move(result)); + } + else + { + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + if (!disk->isDirectory(it->path())) + { + auto source_path = it->path(); + auto result = disk->getExecutor().execute([this, source_path] + { + migrateFileToRestorableSchema(source_path); + }); + + results.push_back(std::move(result)); + } + else + migrateToRestorableSchemaRecursive(it->path(), results); + } + +} + +void DiskObjectStorageMetadataHelper::migrateToRestorableSchema() +{ + try + { + LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name); + + Futures results; + + for (const auto & root : data_roots) + if (disk->exists(root)) + migrateToRestorableSchemaRecursive(root + '/', results); + + for (auto & result : results) + result.wait(); + for (auto & result : results) + result.get(); + + saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, fmt::format("Failed to migrate to restorable schema for disk {}", disk->name)); + + throw; + } +} + +void DiskObjectStorageMetadataHelper::restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +{ + LOG_INFO(disk->log, "Restore operation for disk {} called", disk->name); + + if (!disk->exists(RESTORE_FILE_NAME)) + { + LOG_INFO(disk->log, "No restore file '{}' exists, finishing restore", RESTORE_FILE_NAME); + return; + } + + try + { + RestoreInformation information; + information.source_path = disk->remote_fs_root_path; + information.source_namespace = disk->object_storage->getObjectsNamespace(); + + readRestoreInformation(information); + if (information.revision == 0) + information.revision = LATEST_REVISION; + if (!information.source_path.ends_with('/')) + information.source_path += '/'; + + IObjectStorage * source_object_storage = disk->object_storage.get(); + if (information.source_namespace == disk->object_storage->getObjectsNamespace()) + { + /// In this case we need to additionally cleanup S3 from objects with later revision. + /// Will be simply just restore to different path. + if (information.source_path == disk->remote_fs_root_path && information.revision != LATEST_REVISION) + throw Exception("Restoring to the same bucket and path is allowed if revision is latest (0)", ErrorCodes::BAD_ARGUMENTS); + + /// This case complicates S3 cleanup in case of unsuccessful restore. + if (information.source_path != disk->remote_fs_root_path && disk->remote_fs_root_path.starts_with(information.source_path)) + throw Exception("Restoring to the same bucket is allowed only if source path is not a sub-path of configured path in S3 disk", ErrorCodes::BAD_ARGUMENTS); + } + else + { + object_storage_from_another_namespace = disk->object_storage->cloneObjectStorage(information.source_namespace, config, config_prefix, context); + source_object_storage = object_storage_from_another_namespace.get(); + } + + LOG_INFO(disk->log, "Starting to restore disk {}. Revision: {}, Source path: {}", + disk->name, information.revision, information.source_path); + + if (readSchemaVersion(source_object_storage, information.source_path) < RESTORABLE_SCHEMA_VERSION) + throw Exception("Source bucket doesn't have restorable schema.", ErrorCodes::BAD_ARGUMENTS); + + LOG_INFO(disk->log, "Removing old metadata..."); + + bool cleanup_s3 = information.source_path != disk->remote_fs_root_path; + for (const auto & root : data_roots) + if (disk->exists(root)) + disk->removeSharedRecursive(root + '/', !cleanup_s3, {}); + + LOG_INFO(disk->log, "Old metadata removed, restoring new one"); + restoreFiles(source_object_storage, information); + restoreFileOperations(source_object_storage, information); + + disk->metadata_disk->removeFile(RESTORE_FILE_NAME); + + saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); + + LOG_INFO(disk->log, "Restore disk {} finished", disk->name); + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, fmt::format("Failed to restore disk {}", disk->name)); + + throw; + } +} + +void DiskObjectStorageMetadataHelper::readRestoreInformation(RestoreInformation & restore_information) /// NOLINT +{ + auto buffer = disk->metadata_disk->readFile(RESTORE_FILE_NAME, ReadSettings{}, 512); + buffer->next(); + + try + { + std::map properties; + + while (buffer->hasPendingData()) + { + String property; + readText(property, *buffer); + assertChar('\n', *buffer); + + auto pos = property.find('='); + if (pos == std::string::npos || pos == 0 || pos == property.length()) + throw Exception(fmt::format("Invalid property {} in restore file", property), ErrorCodes::UNKNOWN_FORMAT); + + auto key = property.substr(0, pos); + auto value = property.substr(pos + 1); + + auto it = properties.find(key); + if (it != properties.end()) + throw Exception(fmt::format("Property key duplication {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); + + properties[key] = value; + } + + for (const auto & [key, value] : properties) + { + ReadBufferFromString value_buffer(value); + + if (key == "revision") + readIntText(restore_information.revision, value_buffer); + else if (key == "source_bucket" || key == "source_namespace") + readText(restore_information.source_namespace, value_buffer); + else if (key == "source_path") + readText(restore_information.source_path, value_buffer); + else if (key == "detached") + readBoolTextWord(restore_information.detached, value_buffer); + else + throw Exception(fmt::format("Unknown key {} in restore file", key), ErrorCodes::UNKNOWN_FORMAT); + } + } + catch (const Exception &) + { + tryLogCurrentException(disk->log, "Failed to read restore information"); + throw; + } +} + +static String shrinkKey(const String & path, const String & key) +{ + if (!key.starts_with(path)) + throw Exception("The key " + key + " prefix mismatch with given " + path, ErrorCodes::LOGICAL_ERROR); + + return key.substr(path.length()); +} + +static std::tuple extractRevisionAndOperationFromKey(const String & key) +{ + String revision_str; + String operation; + /// Key has format: ../../r{revision}-{operation} + static const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; + + re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); + + return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; +} + +void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) +{ + LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); + + std::vector> results; + auto restore_files = [this, &source_object_storage, &restore_information, &results](const BlobsPathToSize & keys) + { + std::vector keys_names; + for (const auto & [key, size] : keys) + { + + LOG_INFO(disk->log, "Calling restore for key for disk {}", key); + + /// Skip file operations objects. They will be processed separately. + if (key.find("/operations/") != String::npos) + continue; + + const auto [revision, _] = extractRevisionAndOperationFromKey(key); + /// Filter early if it's possible to get revision from key. + if (revision > restore_information.revision) + continue; + + keys_names.push_back(key); + } + + if (!keys_names.empty()) + { + auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]() + { + processRestoreFiles(source_object_storage, restore_information.source_path, keys_names); + }); + + results.push_back(std::move(result)); + } + + return true; + }; + + BlobsPathToSize children; + source_object_storage->listPrefix(restore_information.source_path, children); + + restore_files(children); + + for (auto & result : results) + result.wait(); + for (auto & result : results) + result.get(); + + LOG_INFO(disk->log, "Files are restored for disk {}", disk->name); + +} + +void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const +{ + for (const auto & key : keys) + { + auto meta = source_object_storage->getObjectMetadata(key); + auto object_attributes = meta.attributes; + + String path; + if (object_attributes.has_value()) + { + /// Restore file if object has 'path' in metadata. + auto path_entry = object_attributes->find("path"); + if (path_entry == object_attributes->end()) + { + /// Such keys can remain after migration, we can skip them. + LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); + continue; + } + + path = path_entry->second; + } + else + continue; + + + disk->createDirectories(directoryPath(path)); + auto relative_key = shrinkKey(source_path, key); + + /// Copy object if we restore to different bucket / path. + if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != source_path) + source_object_storage->copyObjectToAnotherObjectStorage(key, disk->remote_fs_root_path + relative_key, *disk->object_storage); + + auto updater = [relative_key, meta] (DiskObjectStorage::Metadata & metadata) + { + metadata.addObject(relative_key, meta.size_bytes); + return true; + }; + + disk->createUpdateAndStoreMetadata(path, false, updater); + + LOG_TRACE(disk->log, "Restored file {}", path); + } + +} + +void DiskObjectStorage::onFreeze(const String & path) +{ + createDirectories(path); + auto revision_file_buf = metadata_disk->writeFile(path + "revision.txt", 32); + writeIntText(metadata_helper->revision_counter.load(), *revision_file_buf); + revision_file_buf->finalize(); +} + +static String pathToDetached(const String & source_path) +{ + if (source_path.ends_with('/')) + return fs::path(source_path).parent_path().parent_path() / "detached/"; + return fs::path(source_path).parent_path() / "detached/"; +} + +void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) +{ + /// Enable recording file operations if we restore to different bucket / path. + bool send_metadata = source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != restore_information.source_path; + + std::set renames; + auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) + { + const String rename = "rename"; + const String hardlink = "hardlink"; + + for (const auto & [key, _]: keys) + { + const auto [revision, operation] = extractRevisionAndOperationFromKey(key); + if (revision == UNKNOWN_REVISION) + { + LOG_WARNING(disk->log, "Skip key {} with unknown revision", key); + continue; + } + + /// S3 ensures that keys will be listed in ascending UTF-8 bytes order (revision order). + /// We can stop processing if revision of the object is already more than required. + if (revision > restore_information.revision) + return false; + + /// Keep original revision if restore to different bucket / path. + if (send_metadata) + revision_counter = revision - 1; + + auto object_attributes = *(source_object_storage->getObjectMetadata(key).attributes); + if (operation == rename) + { + auto from_path = object_attributes["from_path"]; + auto to_path = object_attributes["to_path"]; + if (disk->exists(from_path)) + { + disk->moveFile(from_path, to_path, send_metadata); + + LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); + + if (restore_information.detached && disk->isDirectory(to_path)) + { + /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way. + if (!from_path.ends_with('/')) + from_path += '/'; + if (!to_path.ends_with('/')) + to_path += '/'; + + /// Always keep latest actual directory path to avoid 'detaching' not existing paths. + auto it = renames.find(from_path); + if (it != renames.end()) + renames.erase(it); + + renames.insert(to_path); + } + } + } + else if (operation == hardlink) + { + auto src_path = object_attributes["src_path"]; + auto dst_path = object_attributes["dst_path"]; + if (disk->exists(src_path)) + { + disk->createDirectories(directoryPath(dst_path)); + disk->createHardLink(src_path, dst_path, send_metadata); + LOG_TRACE(disk->log, "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); + } + } + } + + return true; + }; + + BlobsPathToSize children; + source_object_storage->listPrefix(restore_information.source_path + "operations/", children); + restore_file_operations(children); + + if (restore_information.detached) + { + Strings not_finished_prefixes{"tmp_", "delete_tmp_", "attaching_", "deleting_"}; + + for (const auto & path : renames) + { + /// Skip already detached parts. + if (path.find("/detached/") != std::string::npos) + continue; + + /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. + fs::path directory_path(path); + auto directory_name = directory_path.parent_path().filename().string(); + + auto predicate = [&directory_name](String & prefix) { return directory_name.starts_with(prefix); }; + if (std::any_of(not_finished_prefixes.begin(), not_finished_prefixes.end(), predicate)) + continue; + + auto detached_path = pathToDetached(path); + + LOG_TRACE(disk->log, "Move directory to 'detached' {} -> {}", path, detached_path); + + fs::path from_path = fs::path(path); + fs::path to_path = fs::path(detached_path); + if (path.ends_with('/')) + to_path /= from_path.parent_path().filename(); + else + to_path /= from_path.filename(); + + /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename + if (disk->metadata_disk->exists(to_path)) + disk->metadata_disk->removeRecursive(to_path); + + disk->createDirectories(directoryPath(to_path)); + disk->metadata_disk->moveDirectory(from_path, to_path); + } + } + + LOG_INFO(disk->log, "File operations restored for disk {}", disk->name); +} + +} diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h new file mode 100644 index 00000000000..2d8775030e5 --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -0,0 +1,77 @@ +#pragma once + +#include + +namespace DB +{ + +class DiskObjectStorage; + +/// Class implements storage of ObjectStorage metadata inside object storage itself, +/// so it's possible to recover from this remote information in case of local disk loss. +/// +/// This machanism can be enabled with `true` option inside +/// disk configuration. Implemented only for S3 and Azure Block storage. Other object storages +/// doesn't support metadata for blobs. +/// +/// FIXME: this class is very intrusive and use a lot of DiskObjectStorage internals. +/// FIXME: it's very complex and unreliable, need to implement something better. +class DiskObjectStorageMetadataHelper +{ +public: + static constexpr UInt64 LATEST_REVISION = std::numeric_limits::max(); + static constexpr UInt64 UNKNOWN_REVISION = 0; + + DiskObjectStorageMetadataHelper(DiskObjectStorage * disk_, ReadSettings read_settings_) + : disk(disk_) + , read_settings(std::move(read_settings_)) + { + } + + struct RestoreInformation + { + UInt64 revision = LATEST_REVISION; + String source_namespace; + String source_path; + bool detached = false; + }; + + using Futures = std::vector>; + + void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; + + void findLastRevision(); + + static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); + void saveSchemaVersion(const int & version) const; + void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; + void migrateFileToRestorableSchema(const String & path) const; + void migrateToRestorableSchemaRecursive(const String & path, Futures & results); + void migrateToRestorableSchema(); + + /// Most important method, called on DiskObjectStorage startup + void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); + + void readRestoreInformation(RestoreInformation & restore_information); + void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); + void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const; + void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); + + std::atomic revision_counter = 0; + inline static const String RESTORE_FILE_NAME = "restore"; + + /// Object contains information about schema version. + inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; + /// Version with possibility to backup-restore metadata. + static constexpr int RESTORABLE_SCHEMA_VERSION = 1; + /// Directories with data. + const std::vector data_roots {"data", "store"}; + + DiskObjectStorage * disk; + + ObjectStoragePtr object_storage_from_another_namespace; + + ReadSettings read_settings; +}; + +} diff --git a/src/Disks/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp similarity index 98% rename from src/Disks/HDFSObjectStorage.cpp rename to src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 5a1a70f6a50..82dad1dece0 100644 --- a/src/Disks/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -1,9 +1,11 @@ -#include +#include #include #include + #include #include + #include #include #include diff --git a/src/Disks/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h similarity index 98% rename from src/Disks/HDFSObjectStorage.h rename to src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 3c1bac02ee3..a9a223a3d7e 100644 --- a/src/Disks/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -5,7 +5,7 @@ #if USE_HDFS #include -#include +#include #include #include #include diff --git a/src/Disks/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp similarity index 94% rename from src/Disks/HDFS/registerDiskHDFS.cpp rename to src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index f67f6fbb440..0041da6881d 100644 --- a/src/Disks/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Disks/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp similarity index 96% rename from src/Disks/IObjectStorage.cpp rename to src/Disks/ObjectStorages/IObjectStorage.cpp index 1997022d05c..3941ad8b6f6 100644 --- a/src/Disks/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -20,7 +20,6 @@ ThreadPool & IObjectStorage::getThreadPoolWriter() return writer; } - std::string IObjectStorage::getCacheBasePath() const { return cache ? cache->getBasePath() : ""; diff --git a/src/Disks/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h similarity index 74% rename from src/Disks/IObjectStorage.h rename to src/Disks/ObjectStorages/IObjectStorage.h index e5eb08f145d..ebf3ba54324 100644 --- a/src/Disks/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -52,6 +52,9 @@ struct ObjectMetadata using FinalizeCallback = std::function; +/// Base class for all object storages which implement some subset of ordinary filesystem operations. +/// +/// Examples of object storages are S3, Azure Blob Storage, HDFS. class IObjectStorage { public: @@ -59,18 +62,24 @@ public: : cache(std::move(cache_)) {} + /// Path exists or not virtual bool exists(const std::string & path) const = 0; + /// List on prefix, return childs with their sizes. virtual void listPrefix(const std::string & path, BlobsPathToSize & children) const = 0; + /// Get object metadata if supported. It should be possible to receive + /// at least size of object virtual ObjectMetadata getObjectMetadata(const std::string & path) const = 0; + /// Read single path from object storage, don't use cache virtual std::unique_ptr readObject( /// NOLINT const std::string & path, const ReadSettings & read_settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const = 0; + /// Read multiple objects with common prefix, use cache virtual std::unique_ptr readObjects( /// NOLINT const std::string & common_path_prefix, const BlobsPathToSize & blobs_to_read, @@ -87,21 +96,28 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) = 0; - /// Remove file. Throws exception if file doesn't exists or it's a directory. + /// Remove object. Throws exception if object doesn't exists. virtual void removeObject(const std::string & path) = 0; + /// Remove multiple objects. Some object storages can do batch remove in a more + /// optimal way. virtual void removeObjects(const std::vector & paths) = 0; - /// Remove file if it exists. + /// Remove object on path if exists virtual void removeObjectIfExists(const std::string & path) = 0; + /// Remove objects on path if exists virtual void removeObjectsIfExist(const std::vector & paths) = 0; + /// Copy object with different attributes if required virtual void copyObject( /// NOLINT const std::string & object_from, const std::string & object_to, std::optional object_to_attributes = {}) = 0; + /// Copy object to another instance of object storage + /// by default just read the object from source object storage and write + /// to destination through buffers. virtual void copyObjectToAnotherObjectStorage( /// NOLINT const std::string & object_from, const std::string & object_to, @@ -110,6 +126,7 @@ public: virtual ~IObjectStorage() = default; + /// Path to directory with objects cache std::string getCacheBasePath() const; static AsynchronousReaderPtr getThreadPoolReader(); @@ -122,10 +139,15 @@ public: void removeFromCache(const std::string & path); + /// Apply new settings, in most cases reiniatilize client and some other staff virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + /// Sometimes object storages have something similar to chroot or namespace, for example + /// buckets in S3. If object storage doesn't have any namepaces return empty string. virtual String getObjectsNamespace() const = 0; + /// FIXME: confusing function required for a very specific case. Create new instance of object storage + /// in different namespace. virtual std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; protected: diff --git a/src/Disks/S3/ProxyConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyConfiguration.h similarity index 100% rename from src/Disks/S3/ProxyConfiguration.h rename to src/Disks/ObjectStorages/S3/ProxyConfiguration.h diff --git a/src/Disks/S3/ProxyListConfiguration.cpp b/src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp similarity index 100% rename from src/Disks/S3/ProxyListConfiguration.cpp rename to src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp diff --git a/src/Disks/S3/ProxyListConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h similarity index 100% rename from src/Disks/S3/ProxyListConfiguration.h rename to src/Disks/ObjectStorages/S3/ProxyListConfiguration.h diff --git a/src/Disks/S3/ProxyResolverConfiguration.cpp b/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp similarity index 100% rename from src/Disks/S3/ProxyResolverConfiguration.cpp rename to src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp diff --git a/src/Disks/S3/ProxyResolverConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h similarity index 100% rename from src/Disks/S3/ProxyResolverConfiguration.h rename to src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h diff --git a/src/Disks/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp similarity index 99% rename from src/Disks/S3ObjectStorage.cpp rename to src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 9c33553c6b0..5c8287e5d84 100644 --- a/src/Disks/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -1,5 +1,4 @@ -#include - +#include #if USE_AWS_S3 @@ -13,8 +12,7 @@ #include #include #include -#include - +#include #include #include diff --git a/src/Disks/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h similarity index 99% rename from src/Disks/S3ObjectStorage.h rename to src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 12a8930c596..dc939e5e9dd 100644 --- a/src/Disks/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -4,7 +4,7 @@ #if USE_AWS_S3 -#include +#include #include #include #include diff --git a/src/Disks/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp similarity index 99% rename from src/Disks/S3/diskSettings.cpp rename to src/Disks/ObjectStorages/S3/diskSettings.cpp index 9f170799bb9..d54ef02ec7c 100644 --- a/src/Disks/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/src/Disks/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h similarity index 74% rename from src/Disks/S3/diskSettings.h rename to src/Disks/ObjectStorages/S3/diskSettings.h index d8784d0b5a5..fc5a09ce825 100644 --- a/src/Disks/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -6,12 +6,12 @@ #include #include -#include +#include #include #include -#include -#include -#include +#include +#include +#include #include #include #include diff --git a/src/Disks/S3/parseConfig.h b/src/Disks/ObjectStorages/S3/parseConfig.h similarity index 100% rename from src/Disks/S3/parseConfig.h rename to src/Disks/ObjectStorages/S3/parseConfig.h diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp similarity index 91% rename from src/Disks/S3/registerDiskS3.cpp rename to src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 5da49be12e4..fe3aa78b0b3 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -4,22 +4,23 @@ #include #include #include -#include "Disks/DiskFactory.h" +#include #if USE_AWS_S3 #include #include -#include +#include +#include #include #include -#include -#include -#include +#include +#include +#include #include #include #include -#include +#include #include namespace DB diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d862bd4dd5c..20aa6dbb6c7 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include From 0299cc87e4478bf2bdef1898e743f94dcc4ed522 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 13 May 2022 10:52:25 +0200 Subject: [PATCH 374/615] Improve naming consistency of string search code Just renamings, nothing major ... --- src/Common/OptimizedRegularExpression.h | 1 + src/Functions/CountSubstringsImpl.h | 32 ++-- src/Functions/FunctionsStringSearch.h | 23 ++- src/Functions/FunctionsVisitParam.h | 34 ++--- src/Functions/HasTokenImpl.h | 26 ++-- src/Functions/MatchImpl.h | 185 ++++++++++++----------- src/Functions/MultiMatchAllIndicesImpl.h | 2 - src/Functions/MultiMatchAnyImpl.h | 2 +- src/Functions/PositionImpl.h | 18 +-- src/Functions/Regexps.h | 4 +- src/Functions/ilike.cpp | 2 +- src/Functions/like.h | 2 +- src/Functions/match.cpp | 2 +- src/Functions/notILike.cpp | 2 +- src/Functions/notLike.cpp | 2 +- 15 files changed, 178 insertions(+), 159 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index 53f3a7d34b1..a3d38f27c07 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -107,3 +107,4 @@ private: }; using OptimizedRegularExpression = OptimizedRegularExpressionImpl; +using OptimizedRegularExpressionSingleThreaded = OptimizedRegularExpressionImpl; diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index fc6e4a0e671..6a5cab911ee 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -30,15 +30,15 @@ struct CountSubstringsImpl /// Count occurrences of one substring in many strings. static void vectorConstant( - const ColumnString::Chars & data, - const ColumnString::Offsets & offsets, + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, const std::string & needle, const ColumnPtr & start_pos, PaddedPODArray & res) { - const UInt8 * begin = data.data(); + const UInt8 * const begin = haystack_data.data(); + const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); /// FIXME: suboptimal memset(&res[0], 0, res.size() * sizeof(res[0])); @@ -52,15 +52,15 @@ struct CountSubstringsImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Determine which index it refers to. - while (begin + offsets[i] <= pos) + while (begin + haystack_offsets[i] <= pos) ++i; auto start = start_pos != nullptr ? start_pos->getUInt(i) : 0; /// We check that the entry does not pass through the boundaries of strings. - if (pos + needle.size() < begin + offsets[i]) + if (pos + needle.size() < begin + haystack_offsets[i]) { - auto res_pos = needle.size() + Impl::countChars(reinterpret_cast(begin + offsets[i - 1]), reinterpret_cast(pos)); + auto res_pos = needle.size() + Impl::countChars(reinterpret_cast(begin + haystack_offsets[i - 1]), reinterpret_cast(pos)); if (res_pos >= start) { ++res[i]; @@ -69,14 +69,14 @@ struct CountSubstringsImpl pos += needle.size(); continue; } - pos = begin + offsets[i]; + pos = begin + haystack_offsets[i]; ++i; } } /// Count number of occurrences of substring in string. static void constantConstantScalar( - std::string data, + std::string haystack, std::string needle, UInt64 start_pos, UInt64 & res) @@ -87,9 +87,9 @@ struct CountSubstringsImpl return; auto start = std::max(start_pos, UInt64(1)); - size_t start_byte = Impl::advancePos(data.data(), data.data() + data.size(), start - 1) - data.data(); + size_t start_byte = Impl::advancePos(haystack.data(), haystack.data() + haystack.size(), start - 1) - haystack.data(); size_t new_start_byte; - while ((new_start_byte = data.find(needle, start_byte)) != std::string::npos) + while ((new_start_byte = haystack.find(needle, start_byte)) != std::string::npos) { ++res; /// Intersecting substrings in haystack accounted only once @@ -99,21 +99,21 @@ struct CountSubstringsImpl /// Count number of occurrences of substring in string starting from different positions. static void constantConstant( - std::string data, + std::string haystack, std::string needle, const ColumnPtr & start_pos, PaddedPODArray & res) { - Impl::toLowerIfNeed(data); + Impl::toLowerIfNeed(haystack); Impl::toLowerIfNeed(needle); if (start_pos == nullptr) { - constantConstantScalar(data, needle, 0, res[0]); + constantConstantScalar(haystack, needle, 0, res[0]); return; } - size_t haystack_size = Impl::countChars(data.data(), data.data() + data.size()); + size_t haystack_size = Impl::countChars(haystack.data(), haystack.data() + haystack.size()); size_t size = start_pos != nullptr ? start_pos->size() : 0; for (size_t i = 0; i < size; ++i) @@ -125,7 +125,7 @@ struct CountSubstringsImpl res[i] = 0; continue; } - constantConstantScalar(data, needle, start, res[i]); + constantConstantScalar(haystack, needle, start, res[i]); } } diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index 4aa76ceec28..44219359919 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -15,7 +15,6 @@ namespace DB { /** Search and replace functions in strings: - * * position(haystack, needle) - the normal search for a substring in a string, returns the position (in bytes) of the found substring starting with 1, or 0 if no substring is found. * positionUTF8(haystack, needle) - the same, but the position is calculated at code points, provided that the string is encoded in UTF-8. * positionCaseInsensitive(haystack, needle) @@ -24,13 +23,29 @@ namespace DB * like(haystack, pattern) - search by the regular expression LIKE; Returns 0 or 1. Case-insensitive, but only for Latin. * notLike(haystack, pattern) * + * ilike(haystack, pattern) - like 'like' but case-insensitive + * notIlike(haystack, pattern) + * * match(haystack, pattern) - search by regular expression re2; Returns 0 or 1. - * multiMatchAny(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- search by re2 regular expressions pattern_i; Returns 0 or 1 if any pattern_i matches. - * multiMatchAnyIndex(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- search by re2 regular expressions pattern_i; Returns index of any match or zero if none; - * multiMatchAllIndices(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- search by re2 regular expressions pattern_i; Returns an array of matched indices in any order; * * countSubstrings(haystack, needle) -- count number of occurrences of needle in haystack. * countSubstringsCaseInsensitive(haystack, needle) + * countSubstringsCaseInsensitiveUTF8(haystack, needle) + * + * hasToken() + * hasTokenCaseInsensitive() + * + * JSON stuff: + * visitParamExtractBool() + * simpleJSONExtractBool() + * visitParamExtractFloat() + * simpleJSONExtractFloat() + * visitParamExtractInt() + * simpleJSONExtractInt() + * visitParamExtractUInt() + * simpleJSONExtractUInt() + * visitParamHas() + * simpleJSONHas() * * Applies regexp re2 and pulls: * - the first subpattern, if the regexp has a subpattern; diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h index 09fcf8659ed..d9a155222c4 100644 --- a/src/Functions/FunctionsVisitParam.h +++ b/src/Functions/FunctionsVisitParam.h @@ -85,8 +85,8 @@ struct ExtractParamImpl /// It is assumed that `res` is the correct size and initialized with zeros. static void vectorConstant( - const ColumnString::Chars & data, - const ColumnString::Offsets & offsets, + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, std::string needle, const ColumnPtr & start_pos, PaddedPODArray & res) @@ -97,9 +97,9 @@ struct ExtractParamImpl /// We are looking for a parameter simply as a substring of the form "name" needle = "\"" + needle + "\":"; - const UInt8 * begin = data.data(); + const UInt8 * const begin = haystack_data.data(); + const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); /// The current index in the string array. size_t i = 0; @@ -110,19 +110,19 @@ struct ExtractParamImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Let's determine which index it belongs to. - while (begin + offsets[i] <= pos) + while (begin + haystack_offsets[i] <= pos) { res[i] = 0; ++i; } /// We check that the entry does not pass through the boundaries of strings. - if (pos + needle.size() < begin + offsets[i]) - res[i] = ParamExtractor::extract(pos + needle.size(), begin + offsets[i] - 1); /// don't include terminating zero + if (pos + needle.size() < begin + haystack_offsets[i]) + res[i] = ParamExtractor::extract(pos + needle.size(), begin + haystack_offsets[i] - 1); /// don't include terminating zero else res[i] = 0; - pos = begin + offsets[i]; + pos = begin + haystack_offsets[i]; ++i; } @@ -153,20 +153,20 @@ struct ExtractParamImpl template struct ExtractParamToStringImpl { - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, + static void vector(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, std::string needle, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { /// Constant 5 is taken from a function that performs a similar task FunctionsStringSearch.h::ExtractImpl - res_data.reserve(data.size() / 5); - res_offsets.resize(offsets.size()); + res_data.reserve(haystack_data.size() / 5); + res_offsets.resize(haystack_offsets.size()); /// We are looking for a parameter simply as a substring of the form "name" needle = "\"" + needle + "\":"; - const UInt8 * begin = data.data(); + const UInt8 * const begin = haystack_data.data(); + const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); /// The current index in the string array. size_t i = 0; @@ -177,7 +177,7 @@ struct ExtractParamToStringImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Determine which index it belongs to. - while (begin + offsets[i] <= pos) + while (begin + haystack_offsets[i] <= pos) { res_data.push_back(0); res_offsets[i] = res_data.size(); @@ -185,10 +185,10 @@ struct ExtractParamToStringImpl } /// We check that the entry does not pass through the boundaries of strings. - if (pos + needle.size() < begin + offsets[i]) - ParamExtractor::extract(pos + needle.size(), begin + offsets[i], res_data); + if (pos + needle.size() < begin + haystack_offsets[i]) + ParamExtractor::extract(pos + needle.size(), begin + haystack_offsets[i], res_data); - pos = begin + offsets[i]; + pos = begin + haystack_offsets[i]; res_data.push_back(0); res_offsets[i] = res_data.size(); diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index ec33a07fce3..03826ce7057 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -14,7 +14,7 @@ namespace ErrorCodes /** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. */ -template +template struct HasTokenImpl { using ResultType = UInt8; @@ -24,8 +24,8 @@ struct HasTokenImpl static constexpr auto name = Name::name; static void vectorConstant( - const ColumnString::Chars & data, - const ColumnString::Offsets & offsets, + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, const std::string & pattern, const ColumnPtr & start_pos, PaddedPODArray & res) @@ -33,12 +33,12 @@ struct HasTokenImpl if (start_pos != nullptr) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name); - if (offsets.empty()) + if (haystack_offsets.empty()) return; - const UInt8 * begin = data.data(); + const UInt8 * const begin = haystack_data.data(); + const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); /// The current index in the array of strings. size_t i = 0; @@ -49,25 +49,25 @@ struct HasTokenImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Let's determine which index it refers to. - while (begin + offsets[i] <= pos) + while (begin + haystack_offsets[i] <= pos) { - res[i] = negate_result; + res[i] = negate; ++i; } /// We check that the entry does not pass through the boundaries of strings. - if (pos + pattern.size() < begin + offsets[i]) - res[i] = !negate_result; + if (pos + pattern.size() < begin + haystack_offsets[i]) + res[i] = !negate; else - res[i] = negate_result; + res[i] = negate; - pos = begin + offsets[i]; + pos = begin + haystack_offsets[i]; ++i; } /// Tail, in which there can be no substring. if (i < res.size()) - memset(&res[i], negate_result, (res.size() - i) * sizeof(res[0])); + memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); } template diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 026b38b997b..21cdbea408c 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -20,21 +20,19 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +namespace impl { /// Is the [I]LIKE expression reduced to finding a substring in a string? -static inline bool likePatternIsStrstr(const String & pattern, String & res) +inline bool likePatternIsSubstring(const String & pattern, String & res) { if (pattern.size() < 2 || pattern.front() != '%' || pattern.back() != '%') return false; - res = ""; + res.clear(); res.reserve(pattern.size() - 2); - const char * pos = pattern.data(); - const char * end = pos + pattern.size(); - - ++pos; - --end; + const char * pos = pattern.data() + 1; + const char * const end = pattern.data() + pattern.size() - 1; while (pos < end) { @@ -60,11 +58,16 @@ static inline bool likePatternIsStrstr(const String & pattern, String & res) return true; } -/** 'like' - if true, treat pattern as SQL LIKE or ILIKE; if false - treat pattern as re2 regexp. +} + +/** 'like' - if true, treat pattern as SQL LIKE, otherwise as re2 regexp. + * 'negate' - if true, negate result + * 'case_insensitive' - if true, match case insensitively + * * NOTE: We want to run regexp search for whole columns by one call (as implemented in function 'position') * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. */ -template +template struct MatchImpl { static constexpr bool use_default_implementation_for_constants = true; @@ -78,9 +81,9 @@ struct MatchImpl VolnitskyUTF8>; static void vectorConstant( - const ColumnString::Chars & data, - const ColumnString::Offsets & offsets, - const String & pattern, + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, + const String & needle, const ColumnPtr & start_pos, PaddedPODArray & res) { @@ -88,15 +91,15 @@ struct MatchImpl throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' doesn't support start_pos argument", name); - if (offsets.empty()) + if (haystack_offsets.empty()) return; /// A simple case where the [I]LIKE expression reduces to finding a substring in a string String strstr_pattern; - if (like && likePatternIsStrstr(pattern, strstr_pattern)) + if (like && impl::likePatternIsSubstring(needle, strstr_pattern)) { - const UInt8 * const begin = data.data(); - const UInt8 * const end = data.data() + data.size(); + const UInt8 * const begin = haystack_data.data(); + const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; /// The current index in the array of strings. @@ -109,31 +112,29 @@ struct MatchImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Let's determine which index it refers to. - while (begin + offsets[i] <= pos) + while (begin + haystack_offsets[i] <= pos) { - res[i] = revert; + res[i] = negate; ++i; } /// We check that the entry does not pass through the boundaries of strings. - if (pos + strstr_pattern.size() < begin + offsets[i]) - res[i] = !revert; + if (pos + strstr_pattern.size() < begin + haystack_offsets[i]) + res[i] = !negate; else - res[i] = revert; + res[i] = negate; - pos = begin + offsets[i]; + pos = begin + haystack_offsets[i]; ++i; } /// Tail, in which there can be no substring. if (i < res.size()) - memset(&res[i], revert, (res.size() - i) * sizeof(res[0])); + memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); } else { - size_t size = offsets.size(); - - auto regexp = Regexps::get(pattern); + auto regexp = Regexps::get(needle); String required_substring; bool is_trivial; @@ -141,37 +142,39 @@ struct MatchImpl regexp->getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix); + size_t haystack_size = haystack_offsets.size(); + if (required_substring.empty()) { if (!regexp->getRE2()) /// An empty regexp. Always matches. { - if (size) - memset(res.data(), 1, size * sizeof(res[0])); + if (haystack_size) + memset(res.data(), 1, haystack_size * sizeof(res[0])); } else { size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < haystack_size; ++i) { - res[i] = revert + res[i] = negate ^ regexp->getRE2()->Match( - re2_st::StringPiece(reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1), + {reinterpret_cast(&haystack_data[prev_offset]), haystack_offsets[i] - prev_offset - 1}, 0, - offsets[i] - prev_offset - 1, + haystack_offsets[i] - prev_offset - 1, re2_st::RE2::UNANCHORED, nullptr, 0); - prev_offset = offsets[i]; + prev_offset = haystack_offsets[i]; } } } else { - /// NOTE This almost matches with the case of LikePatternIsStrstr. + /// NOTE This almost matches with the case of impl::likePatternIsSubstring. - const UInt8 * const begin = data.data(); - const UInt8 * const end = data.begin() + data.size(); + const UInt8 * const begin = haystack_data.data(); + const UInt8 * const end = haystack_data.begin() + haystack_data.size(); const UInt8 * pos = begin; /// The current index in the array of strings. @@ -183,23 +186,23 @@ struct MatchImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Determine which index it refers to. - while (begin + offsets[i] <= pos) + while (begin + haystack_offsets[i] <= pos) { - res[i] = revert; + res[i] = negate; ++i; } /// We check that the entry does not pass through the boundaries of strings. - if (pos + required_substring.size() < begin + offsets[i]) + if (pos + required_substring.size() < begin + haystack_offsets[i]) { /// And if it does not, if necessary, we check the regexp. if (is_trivial) - res[i] = !revert; + res[i] = !negate; else { - const char * str_data = reinterpret_cast(&data[offsets[i - 1]]); - size_t str_size = offsets[i] - offsets[i - 1] - 1; + const char * str_data = reinterpret_cast(&haystack_data[haystack_offsets[i - 1]]); + size_t str_size = haystack_offsets[i] - haystack_offsets[i - 1] - 1; /** Even in the case of `required_substring_is_prefix` use UNANCHORED check for regexp, * so that it can match when `required_substring` occurs into the string several times, @@ -207,55 +210,57 @@ struct MatchImpl */ if (required_substring_is_prefix) - res[i] = revert + res[i] = negate ^ regexp->getRE2()->Match( - re2_st::StringPiece(str_data, str_size), + {str_data, str_size}, reinterpret_cast(pos) - str_data, str_size, re2_st::RE2::UNANCHORED, nullptr, 0); else - res[i] = revert + res[i] = negate ^ regexp->getRE2()->Match( - re2_st::StringPiece(str_data, str_size), 0, str_size, re2_st::RE2::UNANCHORED, nullptr, 0); + {str_data, str_size}, 0, str_size, re2_st::RE2::UNANCHORED, nullptr, 0); } } else - res[i] = revert; + res[i] = negate; - pos = begin + offsets[i]; + pos = begin + haystack_offsets[i]; ++i; } /// Tail, in which there can be no substring. if (i < res.size()) - memset(&res[i], revert, (res.size() - i) * sizeof(res[0])); + memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); } } } /// Very carefully crafted copy-paste. static void vectorFixedConstant( - const ColumnString::Chars & data, size_t n, const String & pattern, + const ColumnString::Chars & haystack, + size_t N, + const String & needle, PaddedPODArray & res) { - if (data.empty()) + if (haystack.empty()) return; /// A simple case where the LIKE expression reduces to finding a substring in a string String strstr_pattern; - if (like && likePatternIsStrstr(pattern, strstr_pattern)) + if (like && impl::likePatternIsSubstring(needle, strstr_pattern)) { - const UInt8 * begin = data.data(); + const UInt8 * const begin = haystack.data(); + const UInt8 * const end = haystack.data() + haystack.size(); const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); size_t i = 0; const UInt8 * next_pos = begin; - /// If pattern is larger than string size - it cannot be found. - if (strstr_pattern.size() <= n) + /// If needle is larger than string size - it cannot be found. + if (strstr_pattern.size() <= N) { Searcher searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); @@ -263,19 +268,19 @@ struct MatchImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Let's determine which index it refers to. - while (next_pos + n <= pos) + while (next_pos + N <= pos) { - res[i] = revert; - next_pos += n; + res[i] = negate; + next_pos += N; ++i; } - next_pos += n; + next_pos += N; /// We check that the entry does not pass through the boundaries of strings. if (pos + strstr_pattern.size() <= next_pos) - res[i] = !revert; + res[i] = !negate; else - res[i] = revert; + res[i] = negate; pos = next_pos; ++i; @@ -284,13 +289,11 @@ struct MatchImpl /// Tail, in which there can be no substring. if (i < res.size()) - memset(&res[i], revert, (res.size() - i) * sizeof(res[0])); + memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); } else { - size_t size = data.size() / n; - - auto regexp = Regexps::get(pattern); + auto regexp = Regexps::get(needle); String required_substring; bool is_trivial; @@ -298,44 +301,46 @@ struct MatchImpl regexp->getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix); + const size_t haystack_size = haystack.size() / N; + if (required_substring.empty()) { if (!regexp->getRE2()) /// An empty regexp. Always matches. { - if (size) - memset(res.data(), 1, size * sizeof(res[0])); + if (haystack_size) + memset(res.data(), 1, haystack_size * sizeof(res[0])); } else { size_t offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < haystack_size; ++i) { - res[i] = revert + res[i] = negate ^ regexp->getRE2()->Match( - re2_st::StringPiece(reinterpret_cast(&data[offset]), n), + {reinterpret_cast(&haystack[offset]), N}, 0, - n, + N, re2_st::RE2::UNANCHORED, nullptr, 0); - offset += n; + offset += N; } } } else { - /// NOTE This almost matches with the case of LikePatternIsStrstr. + /// NOTE This almost matches with the case of likePatternIsSubstring. - const UInt8 * begin = data.data(); + const UInt8 * const begin = haystack.data(); + const UInt8 * const end = haystack.data() + haystack.size(); const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); size_t i = 0; const UInt8 * next_pos = begin; /// If required substring is larger than string size - it cannot be found. - if (required_substring.size() <= n) + if (required_substring.size() <= N) { Searcher searcher(required_substring.data(), required_substring.size(), end - pos); @@ -343,23 +348,23 @@ struct MatchImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Let's determine which index it refers to. - while (next_pos + n <= pos) + while (next_pos + N <= pos) { - res[i] = revert; - next_pos += n; + res[i] = negate; + next_pos += N; ++i; } - next_pos += n; + next_pos += N; if (pos + required_substring.size() <= next_pos) { /// And if it does not, if necessary, we check the regexp. if (is_trivial) - res[i] = !revert; + res[i] = !negate; else { - const char * str_data = reinterpret_cast(next_pos - n); + const char * str_data = reinterpret_cast(next_pos - N); /** Even in the case of `required_substring_is_prefix` use UNANCHORED check for regexp, * so that it can match when `required_substring` occurs into the string several times, @@ -367,22 +372,22 @@ struct MatchImpl */ if (required_substring_is_prefix) - res[i] = revert + res[i] = negate ^ regexp->getRE2()->Match( - re2_st::StringPiece(str_data, n), + {str_data, N}, reinterpret_cast(pos) - str_data, - n, + N, re2_st::RE2::UNANCHORED, nullptr, 0); else - res[i] = revert + res[i] = negate ^ regexp->getRE2()->Match( - re2_st::StringPiece(str_data, n), 0, n, re2_st::RE2::UNANCHORED, nullptr, 0); + {str_data, N}, 0, N, re2_st::RE2::UNANCHORED, nullptr, 0); } } else - res[i] = revert; + res[i] = negate; pos = next_pos; ++i; @@ -391,7 +396,7 @@ struct MatchImpl /// Tail, in which there can be no substring. if (i < res.size()) - memset(&res[i], revert, (res.size() - i) * sizeof(res[0])); + memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); } } } diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index f3e67008707..adf9e9b585f 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -11,8 +11,6 @@ #if USE_HYPERSCAN # include -#else -# include "MatchImpl.h" #endif diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 747c0e5e62c..595a3c8de5b 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -120,7 +120,7 @@ struct MultiMatchAnyImpl memset(accum.data(), 0, accum.size()); for (size_t j = 0; j < needles.size(); ++j) { - MatchImpl::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); + MatchImpl::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); for (size_t i = 0; i < res.size(); ++i) { if constexpr (FindAny) diff --git a/src/Functions/PositionImpl.h b/src/Functions/PositionImpl.h index d3b6d74c3cd..ea2fc6970e7 100644 --- a/src/Functions/PositionImpl.h +++ b/src/Functions/PositionImpl.h @@ -186,15 +186,15 @@ struct PositionImpl /// Find one substring in many strings. static void vectorConstant( - const ColumnString::Chars & data, - const ColumnString::Offsets & offsets, + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, const std::string & needle, const ColumnPtr & start_pos, PaddedPODArray & res) { - const UInt8 * begin = data.data(); + const UInt8 * const begin = haystack_data.data(); + const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); /// Current index in the array of strings. size_t i = 0; @@ -205,7 +205,7 @@ struct PositionImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// Determine which index it refers to. - while (begin + offsets[i] <= pos) + while (begin + haystack_offsets[i] <= pos) { res[i] = 0; ++i; @@ -213,14 +213,14 @@ struct PositionImpl auto start = start_pos != nullptr ? start_pos->getUInt(i) : 0; /// We check that the entry does not pass through the boundaries of strings. - if (pos + needle.size() < begin + offsets[i]) + if (pos + needle.size() < begin + haystack_offsets[i]) { - auto res_pos = 1 + Impl::countChars(reinterpret_cast(begin + offsets[i - 1]), reinterpret_cast(pos)); + auto res_pos = 1 + Impl::countChars(reinterpret_cast(begin + haystack_offsets[i - 1]), reinterpret_cast(pos)); if (res_pos < start) { pos = reinterpret_cast(Impl::advancePos( reinterpret_cast(pos), - reinterpret_cast(begin + offsets[i]), + reinterpret_cast(begin + haystack_offsets[i]), start - res_pos)); continue; } @@ -230,7 +230,7 @@ struct PositionImpl { res[i] = 0; } - pos = begin + offsets[i]; + pos = begin + haystack_offsets[i]; ++i; } diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 9a1938a3f32..5c38583934a 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -38,7 +38,7 @@ namespace ErrorCodes namespace Regexps { - using Regexp = OptimizedRegularExpressionImpl; + using Regexp = OptimizedRegularExpressionSingleThreaded; using Pool = ObjectPoolMap; template @@ -68,7 +68,7 @@ namespace Regexps flags |= OptimizedRegularExpression::RE_NO_CAPTURE; if (case_insensitive) - flags |= Regexps::Regexp::RE_CASELESS; + flags |= OptimizedRegularExpression::RE_CASELESS; ProfileEvents::increment(ProfileEvents::RegexpCreated); return new Regexp{createRegexp(pattern, flags)}; diff --git a/src/Functions/ilike.cpp b/src/Functions/ilike.cpp index 116c945e04f..1222cc48d07 100644 --- a/src/Functions/ilike.cpp +++ b/src/Functions/ilike.cpp @@ -12,7 +12,7 @@ struct NameILike static constexpr auto name = "ilike"; }; -using ILikeImpl = MatchImpl; +using ILikeImpl = MatchImpl; using FunctionILike = FunctionsStringSearch; } diff --git a/src/Functions/like.h b/src/Functions/like.h index a00891ec64c..edb738d393b 100644 --- a/src/Functions/like.h +++ b/src/Functions/like.h @@ -11,7 +11,7 @@ struct NameLike static constexpr auto name = "like"; }; -using LikeImpl = MatchImpl; +using LikeImpl = MatchImpl; using FunctionLike = FunctionsStringSearch; } diff --git a/src/Functions/match.cpp b/src/Functions/match.cpp index 69dc1a3d99a..4c329701464 100644 --- a/src/Functions/match.cpp +++ b/src/Functions/match.cpp @@ -13,7 +13,7 @@ struct NameMatch static constexpr auto name = "match"; }; -using FunctionMatch = FunctionsStringSearch>; +using FunctionMatch = FunctionsStringSearch>; } diff --git a/src/Functions/notILike.cpp b/src/Functions/notILike.cpp index be40e2b989e..b5e06ac55f4 100644 --- a/src/Functions/notILike.cpp +++ b/src/Functions/notILike.cpp @@ -12,7 +12,7 @@ struct NameNotILike static constexpr auto name = "notILike"; }; -using NotILikeImpl = MatchImpl; +using NotILikeImpl = MatchImpl; using FunctionNotILike = FunctionsStringSearch; } diff --git a/src/Functions/notLike.cpp b/src/Functions/notLike.cpp index 7c4ea8ab2dc..7fa1b6f9122 100644 --- a/src/Functions/notLike.cpp +++ b/src/Functions/notLike.cpp @@ -12,7 +12,7 @@ struct NameNotLike static constexpr auto name = "notLike"; }; -using FunctionNotLike = FunctionsStringSearch>; +using FunctionNotLike = FunctionsStringSearch>; } From 00d004c0616ccdb2717cba34492a13c7a68ee065 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 16 May 2022 09:59:13 +0200 Subject: [PATCH 375/615] Remove -T flag from instructions about test creation That flag no longer exists and test creation works fine w/o it (see commit f07d4177a5 for a related change). --- docs/en/development/adding_test_queries.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/adding_test_queries.md b/docs/en/development/adding_test_queries.md index 9b993a96ed5..ca47818dad7 100644 --- a/docs/en/development/adding_test_queries.md +++ b/docs/en/development/adding_test_queries.md @@ -106,7 +106,7 @@ vim tests/queries/0_stateless/01521_dummy_test.sql 4) run the test, and put the result of that into the reference file: ``` -clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee tests/queries/0_stateless/01521_dummy_test.reference +clickhouse-client -nm < tests/queries/0_stateless/01521_dummy_test.sql | tee tests/queries/0_stateless/01521_dummy_test.reference ``` 5) ensure everything is correct, if the test output is incorrect (due to some bug for example), adjust the reference file using text editor. From 4829ae8380831a581892349c67eb927042f981aa Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 16 May 2022 22:23:51 +0200 Subject: [PATCH 376/615] Replace overly clever const argument logic by something simpler The previous logic was smart but too inflexible to support the next commits. Replace by a simple pushdown logic where string search implementations return their const arguments instead of having the common class figure these out based on properties/traits. --- src/Functions/CountSubstringsImpl.h | 2 ++ src/Functions/FunctionsStringSearch.h | 6 +----- src/Functions/FunctionsVisitParam.h | 2 ++ src/Functions/HasTokenImpl.h | 3 +++ src/Functions/MatchImpl.h | 3 +++ src/Functions/PositionImpl.h | 2 ++ 6 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index 6a5cab911ee..c53d1a204e7 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -26,6 +26,8 @@ struct CountSubstringsImpl static constexpr bool supports_start_pos = true; static constexpr auto name = Name::name; + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {};} + using ResultType = UInt64; /// Count occurrences of one substring in many strings. diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index 44219359919..27b93fc0240 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -85,11 +85,7 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { - if (!Impl::use_default_implementation_for_constants) - return ColumnNumbers{}; - if (!Impl::supports_start_pos) - return ColumnNumbers{1, 2}; - return ColumnNumbers{1, 2, 3}; + return Impl::getArgumentsThatAreAlwaysConstant(); } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h index d9a155222c4..c8ba625b835 100644 --- a/src/Functions/FunctionsVisitParam.h +++ b/src/Functions/FunctionsVisitParam.h @@ -83,6 +83,8 @@ struct ExtractParamImpl static constexpr bool supports_start_pos = false; static constexpr auto name = Name::name; + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2};} + /// It is assumed that `res` is the correct size and initialized with zeros. static void vectorConstant( const ColumnString::Chars & haystack_data, diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index 03826ce7057..a6280b4d368 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -23,6 +24,8 @@ struct HasTokenImpl static constexpr bool supports_start_pos = false; static constexpr auto name = Name::name; + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2};} + static void vectorConstant( const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 21cdbea408c..4be0f955091 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "Regexps.h" #include "config_functions.h" @@ -74,6 +75,8 @@ struct MatchImpl static constexpr bool supports_start_pos = false; static constexpr auto name = Name::name; + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2};} + using ResultType = UInt8; using Searcher = std::conditional_t Date: Sun, 22 May 2022 20:33:07 +0300 Subject: [PATCH 377/615] Update 02306_window_move_row_number_fix.sql --- tests/queries/0_stateless/02306_window_move_row_number_fix.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql index 96dd8f6176b..5bc0c41b3ee 100644 --- a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql @@ -1 +1,2 @@ +-- Tags: no-backward-compatibility-check SELECT nth_value(NULL, 1048577) OVER (Rows BETWEEN 1023 FOLLOWING AND UNBOUNDED FOLLOWING) From e7dc438a48cce5048b5aeb2f2b9106991131d4f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 May 2022 21:35:00 +0200 Subject: [PATCH 378/615] Fix scroll in benchmark page --- website/css/blog.css | 28 -- website/css/docs.css | 62 ----- website/css/docsearch.css | 555 -------------------------------------- website/css/highlight.css | 76 ------ website/css/main.css | 2 +- 5 files changed, 1 insertion(+), 722 deletions(-) diff --git a/website/css/blog.css b/website/css/blog.css index 089856b8e00..e69de29bb2d 100644 --- a/website/css/blog.css +++ b/website/css/blog.css @@ -1,28 +0,0 @@ -body.blog .dropdown-item { - color: #111 !important; -} - -body.blog .dropdown-item:hover, -body.blog .dropdown-item:focus { - background-color: #efefef; -} - -.blog .social-icon { - background: #eee; -} - -@media (prefers-color-scheme: dark) { - body.blog .dropdown-item { - color: #fff !important; - } - - .blog .dropdown-item:hover, - .blog .dropdown-item:focus, - .blog .tag{ - background-color: #666 !important; - } - - .blog .social-icon { - background: #444451; - } -} diff --git a/website/css/docs.css b/website/css/docs.css index 735559f384c..e7d41bc28bf 100644 --- a/website/css/docs.css +++ b/website/css/docs.css @@ -83,68 +83,6 @@ summary { line-height: 1.25; } -#docsearch-input:focus, #docsearch-input:active { - border: 0; - color: #efefef!important; -} - -@media (max-width: 768px) { - #search-form { - width: 50%; - } - .algolia-autocomplete, #docsearch-input, #search-form .input-group { - width: 100%; - } -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - display: none !important; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - float: none !important; - width: 100% !important; - background-color: #444451; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content:before { - content: none !important; -} - -.algolia-autocomplete .ds-dropdown-menu { - max-height: 512px; - overflow-x: hidden; - overflow-y: auto; -} - -.algolia-autocomplete .ds-dropdown-menu, -.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-], -.algolia-autocomplete .algolia-docsearch-suggestion, -.algolia-autocomplete .ds-dropdown-menu:before { - background: #1c1c1c !important; - border-color: #333; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - background-color: #333; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content:hover, -.algolia-autocomplete .ds-dropdown-menu .ds-suggestion.ds-cursor .algolia-docsearch-suggestion:not(.suggestion-layout-simple) .algolia-docsearch-suggestion--content { - background-color: #444451 !important; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header, -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column, -.algolia-autocomplete .algolia-docsearch-suggestion--title, -.algolia-autocomplete .algolia-docsearch-suggestion--text { - color: #efefef; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #f14600; -} - #toc .nav-link { color: #333; } diff --git a/website/css/docsearch.css b/website/css/docsearch.css index 1e7ffb30512..e69de29bb2d 100644 --- a/website/css/docsearch.css +++ b/website/css/docsearch.css @@ -1,555 +0,0 @@ -.searchbox { - display: inline-block; - position: relative; - width: 200px; - height: 32px !important; - white-space: nowrap; - box-sizing: border-box; - visibility: visible !important; -} - -.searchbox .algolia-autocomplete { - display: block; - width: 100%; - height: 100%; -} - -.searchbox__wrapper { - width: 100%; - height: 100%; - z-index: 999; - position: relative; -} - -.searchbox__input { - display: inline-block; - box-sizing: border-box; - border: 0; - border-radius: 16px; - box-shadow: inset 0 0 0 1px #cccccc; - background: #ffffff !important; - padding: 0; - padding-right: 26px; - padding-left: 32px; - width: 100%; - height: 100%; - vertical-align: middle; - white-space: normal; - font-size: 12px; - -webkit-appearance: none; - -moz-appearance: none; - appearance: none; -} - -.searchbox__input::-webkit-search-decoration, .searchbox__input::-webkit-search-cancel-button, .searchbox__input::-webkit-search-results-button, .searchbox__input::-webkit-search-results-decoration { - display: none; -} - -.searchbox__input:hover { - box-shadow: inset 0 0 0 1px #b3b3b3; -} - -.searchbox__input:focus, .searchbox__input:active { - outline: 0; - box-shadow: inset 0 0 0 1px #aaaaaa; - background: #ffffff; -} - -.searchbox__input::-webkit-input-placeholder { - color: #aaaaaa; -} - -.searchbox__input:-ms-input-placeholder { - color: #aaaaaa; -} - -.searchbox__input::-ms-input-placeholder { - color: #aaaaaa; -} - -.searchbox__input::placeholder { - color: #aaaaaa; -} - -.searchbox__submit { - position: absolute; - top: 0; - margin: 0; - border: 0; - border-radius: 16px 0 0 16px; - background-color: rgba(69, 142, 225, 0); - padding: 0; - width: 32px; - height: 100%; - vertical-align: middle; - text-align: center; - font-size: inherit; - -webkit-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; - right: inherit; - left: 0; -} - -.searchbox__submit::before { - display: inline-block; - margin-right: -4px; - height: 100%; - vertical-align: middle; - content: ''; -} - -.searchbox__submit:hover, .searchbox__submit:active { - cursor: pointer; -} - -.searchbox__submit:focus { - outline: 0; -} - -.searchbox__submit svg { - width: 14px; - height: 14px; - vertical-align: middle; - fill: #6d7e96; -} - -.searchbox__reset { - display: block; - position: absolute; - top: 8px; - right: 8px; - margin: 0; - border: 0; - background: none; - cursor: pointer; - padding: 0; - font-size: inherit; - -webkit-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; - fill: rgba(0, 0, 0, 0.5); -} - -.searchbox__reset.hide { - display: none; -} - -.searchbox__reset:focus { - outline: 0; -} - -.searchbox__reset svg { - display: block; - margin: 4px; - width: 8px; - height: 8px; -} - -.searchbox__input:valid ~ .searchbox__reset { - display: block; - -webkit-animation-name: sbx-reset-in; - animation-name: sbx-reset-in; - -webkit-animation-duration: 0.15s; - animation-duration: 0.15s; -} - -@-webkit-keyframes sbx-reset-in { - 0% { - -webkit-transform: translate3d(-20%, 0, 0); - transform: translate3d(-20%, 0, 0); - opacity: 0; - } - 100% { - -webkit-transform: none; - transform: none; - opacity: 1; - } -} - -@keyframes sbx-reset-in { - 0% { - -webkit-transform: translate3d(-20%, 0, 0); - transform: translate3d(-20%, 0, 0); - opacity: 0; - } - 100% { - -webkit-transform: none; - transform: none; - opacity: 1; - } -} - -.algolia-autocomplete.algolia-autocomplete-right .ds-dropdown-menu { - right: 0 !important; - left: inherit !important; -} - -.algolia-autocomplete.algolia-autocomplete-right .ds-dropdown-menu:before { - right: 48px; -} - -.algolia-autocomplete.algolia-autocomplete-left .ds-dropdown-menu { - left: 0 !important; - right: inherit !important; -} - -.algolia-autocomplete.algolia-autocomplete-left .ds-dropdown-menu:before { - left: 48px; -} - -.algolia-autocomplete .ds-dropdown-menu { - position: relative; - top: -6px; - border-radius: 4px; - margin: 6px 0 0; - padding: 0; - text-align: left; - height: auto; - position: relative; - background: transparent; - border: none; - z-index: 999; - max-width: 600px; - min-width: 500px; - box-shadow: 0 1px 0 0 rgba(0, 0, 0, 0.2), 0 2px 3px 0 rgba(0, 0, 0, 0.1); -} - -.algolia-autocomplete .ds-dropdown-menu:before { - display: block; - position: absolute; - content: ''; - width: 14px; - height: 14px; - background: #fff; - z-index: 1000; - top: -7px; - border-top: 1px solid #d9d9d9; - border-right: 1px solid #d9d9d9; - -webkit-transform: rotate(-45deg); - transform: rotate(-45deg); - border-radius: 2px; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { - position: relative; - z-index: 1000; - margin-top: 8px; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions a:hover { - text-decoration: none; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestion { - cursor: pointer; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestion.ds-cursor .algolia-docsearch-suggestion.suggestion-layout-simple { - background-color: rgba(69, 142, 225, 0.05); -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestion.ds-cursor .algolia-docsearch-suggestion:not(.suggestion-layout-simple) .algolia-docsearch-suggestion--content { - background-color: rgba(69, 142, 225, 0.05); -} - -.algolia-autocomplete .ds-dropdown-menu [class^='ds-dataset-'] { - position: relative; - border: solid 1px #d9d9d9; - background: #fff; - border-radius: 4px; - overflow: auto; - padding: 0 8px 8px; -} - -.algolia-autocomplete .ds-dropdown-menu * { - box-sizing: border-box; -} - -.algolia-autocomplete .algolia-docsearch-suggestion { - display: block; - position: relative; - padding: 0 8px; - background: #fff; - color: #02060c; - overflow: hidden; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #174d8c; - background: rgba(143, 187, 237, 0.1); - padding: 0.1em 0.05em; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header .algolia-docsearch-suggestion--category-header-lvl0 -.algolia-docsearch-suggestion--highlight, -.algolia-autocomplete .algolia-docsearch-suggestion--category-header .algolia-docsearch-suggestion--category-header-lvl1 -.algolia-docsearch-suggestion--highlight { - padding: 0 0 1px; - background: inherit; - box-shadow: inset 0 -2px 0 0 rgba(69, 142, 225, 0.8); - color: inherit; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - padding: 0 0 1px; - background: inherit; - box-shadow: inset 0 -2px 0 0 rgba(69, 142, 225, 0.8); - color: inherit; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - display: block; - float: right; - width: 70%; - position: relative; - padding: 5.33333px 0 5.33333px 10.66667px; - cursor: pointer; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content:before { - content: ''; - position: absolute; - display: block; - top: 0; - height: 100%; - width: 1px; - background: #ddd; - left: -1px; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header { - position: relative; - border-bottom: 1px solid #ddd; - display: none; - margin-top: 8px; - padding: 4px 0; - font-size: 1em; - color: #33363d; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { - width: 100%; - float: left; - padding: 8px 0 0 0; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - float: left; - width: 30%; - padding-left: 0; - text-align: right; - position: relative; - padding: 5.33333px 10.66667px; - color: #a4a7ae; - font-size: 0.9em; - word-wrap: break-word; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column:before { - content: ''; - position: absolute; - display: block; - top: 0; - height: 100%; - width: 1px; - background: #ddd; - right: 0; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-inline { - display: none; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--title { - margin-bottom: 4px; - color: #02060c; - font-size: 0.9em; - font-weight: bold; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text { - display: block; - line-height: 1.2em; - font-size: 0.85em; - color: #63676d; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--no-results { - width: 100%; - padding: 8px 0; - text-align: center; - font-size: 1.2em; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--no-results::before { - display: none; -} - -.algolia-autocomplete .algolia-docsearch-suggestion code { - padding: 1px 5px; - font-size: 90%; - border: none; - color: #222222; - background-color: #ebebeb; - border-radius: 3px; - font-family: Menlo, Monaco, Consolas, 'Courier New', monospace; -} - -.algolia-autocomplete .algolia-docsearch-suggestion code .algolia-docsearch-suggestion--highlight { - background: none; -} - -.algolia-autocomplete .algolia-docsearch-suggestion.algolia-docsearch-suggestion__main .algolia-docsearch-suggestion--category-header { - display: block; -} - -.algolia-autocomplete .algolia-docsearch-suggestion.algolia-docsearch-suggestion__secondary { - display: block; -} - -@media all and (min-width: 768px) { - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: block; - } -} - -@media all and (max-width: 768px) { - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: inline-block; - width: auto; - text-align: left; - float: left; - padding: 0; - color: #02060c; - font-size: 0.9em; - font-weight: bold; - text-align: left; - opacity: 0.5; - } - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column:before { - display: none; - } - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column:after { - content: '|'; - } - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--content { - display: inline-block; - width: auto; - text-align: left; - float: left; - padding: 0; - } - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--content:before { - display: none; - } -} - -.algolia-autocomplete .suggestion-layout-simple.algolia-docsearch-suggestion { - border-bottom: solid 1px #eee; - padding: 8px; - margin: 0; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--content { - width: 100%; - padding: 0; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--content::before { - display: none; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header { - margin: 0; - padding: 0; - display: block; - width: 100%; - border: none; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl0 { - opacity: 0.6; - font-size: 0.85em; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl1 { - opacity: 0.6; - font-size: 0.85em; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl1::before { - background-image: url('data:image/svg+xml;utf8,'); - content: ''; - width: 10px; - height: 10px; - display: inline-block; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--wrapper { - width: 100%; - float: left; - margin: 0; - padding: 0; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--duplicate-content, .algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--subcategory-inline { - display: none !important; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--title { - margin: 0; - color: #458ee1; - font-size: 0.9em; - font-weight: normal; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--title::before { - content: '#'; - font-weight: bold; - color: #458ee1; - display: inline-block; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--text { - margin: 4px 0 0; - display: block; - line-height: 1.4em; - padding: 5.33333px 8px; - background: #f8f8f8; - font-size: 0.85em; - opacity: 0.8; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - color: #3f4145; - font-weight: bold; - box-shadow: none; -} - -.algolia-autocomplete .algolia-docsearch-footer { - width: 134px; - height: 20px; - z-index: 2000; - margin-top: 10.66667px; - float: right; - font-size: 0; - line-height: 0; -} - -.algolia-autocomplete .algolia-docsearch-footer--logo { - background-image: url("data:image/svg+xml,%3Csvg width='168' height='24' xmlns='http://www.w3.org/2000/svg'%3E%3Cg fill='none' fill-rule='evenodd'%3E%3Cpath d='M78.988.938h16.594a2.968 2.968 0 0 1 2.966 2.966V20.5a2.967 2.967 0 0 1-2.966 2.964H78.988a2.967 2.967 0 0 1-2.966-2.964V3.897A2.961 2.961 0 0 1 78.988.938zm41.937 17.866c-4.386.02-4.386-3.54-4.386-4.106l-.007-13.336 2.675-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-10.846-2.18c.821 0 1.43-.047 1.855-.129v-2.719a6.334 6.334 0 0 0-1.574-.199c-.295 0-.596.021-.897.069a2.699 2.699 0 0 0-.814.24c-.24.116-.439.28-.582.491-.15.212-.219.335-.219.656 0 .628.219.991.616 1.23s.938.362 1.615.362zm-.233-9.7c.883 0 1.629.109 2.231.328.602.218 1.088.525 1.444.915.363.396.609.922.76 1.483.157.56.232 1.175.232 1.85v6.874c-.41.089-1.034.19-1.868.314-.834.123-1.772.185-2.813.185-.69 0-1.327-.069-1.895-.198a4.001 4.001 0 0 1-1.471-.636 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.803 0-.656.13-1.073.384-1.525.26-.45.608-.819 1.047-1.106.445-.287.95-.492 1.532-.615a8.8 8.8 0 0 1 1.82-.185 8.404 8.404 0 0 1 1.972.24v-.438c0-.307-.035-.6-.11-.874a1.88 1.88 0 0 0-.384-.73 1.784 1.784 0 0 0-.724-.493 3.164 3.164 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164a7.735 7.735 0 0 0-1.26.307l-.321-2.192c.335-.117.834-.233 1.478-.349a10.98 10.98 0 0 1 2.073-.178zm52.842 9.626c.822 0 1.43-.048 1.854-.13V13.7a6.347 6.347 0 0 0-1.574-.199c-.294 0-.595.021-.896.069a2.7 2.7 0 0 0-.814.24 1.46 1.46 0 0 0-.582.491c-.15.212-.218.335-.218.656 0 .628.218.991.615 1.23.404.245.938.362 1.615.362zm-.226-9.694c.883 0 1.629.108 2.231.327.602.219 1.088.526 1.444.915.355.39.609.923.759 1.483.158.56.233 1.175.233 1.852v6.873c-.41.088-1.034.19-1.868.314-.834.123-1.772.184-2.813.184-.69 0-1.327-.068-1.895-.198a4.001 4.001 0 0 1-1.471-.635 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.804 0-.656.13-1.073.384-1.524.26-.45.608-.82 1.047-1.107.445-.286.95-.491 1.532-.614a8.803 8.803 0 0 1 2.751-.13c.329.034.671.096 1.04.185v-.437a3.3 3.3 0 0 0-.109-.875 1.873 1.873 0 0 0-.384-.731 1.784 1.784 0 0 0-.724-.492 3.165 3.165 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164-.514.089-.938.191-1.26.307l-.321-2.193c.335-.116.834-.232 1.478-.348a11.633 11.633 0 0 1 2.073-.177zm-8.034-1.271a1.626 1.626 0 0 1-1.628-1.62c0-.895.725-1.62 1.628-1.62.904 0 1.63.725 1.63 1.62 0 .895-.733 1.62-1.63 1.62zm1.348 13.22h-2.689V7.27l2.69-.423v11.956zm-4.714 0c-4.386.02-4.386-3.54-4.386-4.107l-.008-13.336 2.676-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-8.698-5.903c0-1.156-.253-2.119-.746-2.788-.493-.677-1.183-1.01-2.067-1.01-.882 0-1.574.333-2.065 1.01-.493.676-.733 1.632-.733 2.788 0 1.168.246 1.953.74 2.63.492.683 1.183 1.018 2.066 1.018.882 0 1.574-.342 2.067-1.019.492-.683.738-1.46.738-2.63zm2.737-.007c0 .902-.13 1.584-.397 2.33a5.52 5.52 0 0 1-1.128 1.906 4.986 4.986 0 0 1-1.752 1.223c-.685.286-1.739.45-2.265.45-.528-.006-1.574-.157-2.252-.45a5.096 5.096 0 0 1-1.744-1.223c-.487-.527-.863-1.162-1.137-1.906a6.345 6.345 0 0 1-.41-2.33c0-.902.123-1.77.397-2.508a5.554 5.554 0 0 1 1.15-1.892 5.133 5.133 0 0 1 1.75-1.216c.679-.287 1.425-.423 2.232-.423.808 0 1.553.142 2.237.423.685.286 1.274.69 1.753 1.216a5.644 5.644 0 0 1 1.135 1.892c.287.738.431 1.606.431 2.508zm-20.138 0c0 1.12.246 2.363.738 2.882.493.52 1.13.78 1.91.78.424 0 .828-.062 1.204-.178.377-.116.677-.253.917-.417V9.33a10.476 10.476 0 0 0-1.766-.226c-.971-.028-1.71.37-2.23 1.004-.513.636-.773 1.75-.773 2.788zm7.438 5.274c0 1.824-.466 3.156-1.404 4.004-.936.846-2.367 1.27-4.296 1.27-.705 0-2.17-.137-3.34-.396l.431-2.118c.98.205 2.272.26 2.95.26 1.074 0 1.84-.219 2.299-.656.459-.437.684-1.086.684-1.948v-.437a8.07 8.07 0 0 1-1.047.397c-.43.13-.93.198-1.492.198-.739 0-1.41-.116-2.018-.349a4.206 4.206 0 0 1-1.567-1.025c-.431-.45-.774-1.017-1.013-1.694-.24-.677-.363-1.885-.363-2.773 0-.834.13-1.88.384-2.577.26-.696.629-1.298 1.129-1.796.493-.498 1.095-.881 1.8-1.162a6.605 6.605 0 0 1 2.428-.457c.87 0 1.67.109 2.45.24.78.129 1.444.265 1.985.415V18.17z' fill='%235468FF'/%3E%3Cpath d='M6.972 6.677v1.627c-.712-.446-1.52-.67-2.425-.67-.585 0-1.045.13-1.38.391a1.24 1.24 0 0 0-.502 1.03c0 .425.164.765.494 1.02.33.256.835.532 1.516.83.447.192.795.356 1.045.495.25.138.537.332.862.582.324.25.563.548.718.894.154.345.23.741.23 1.188 0 .947-.334 1.691-1.004 2.234-.67.542-1.537.814-2.601.814-1.18 0-2.16-.229-2.936-.686v-1.708c.84.628 1.814.942 2.92.942.585 0 1.048-.136 1.388-.407.34-.271.51-.646.51-1.125 0-.287-.1-.55-.302-.79-.203-.24-.42-.42-.655-.542-.234-.123-.585-.29-1.053-.503-.276-.127-.47-.218-.582-.271a13.67 13.67 0 0 1-.55-.287 4.275 4.275 0 0 1-.567-.351 6.92 6.92 0 0 1-.455-.4c-.18-.17-.31-.34-.39-.51-.08-.17-.155-.37-.224-.598a2.553 2.553 0 0 1-.104-.742c0-.915.333-1.638.998-2.17.664-.532 1.523-.798 2.576-.798.968 0 1.793.17 2.473.51zm7.468 5.696v-.287c-.022-.607-.187-1.088-.495-1.444-.309-.357-.75-.535-1.324-.535-.532 0-.99.194-1.373.583-.382.388-.622.949-.717 1.683h3.909zm1.005 2.792v1.404c-.596.34-1.383.51-2.362.51-1.255 0-2.255-.377-3-1.132-.744-.755-1.116-1.744-1.116-2.968 0-1.297.34-2.316 1.021-3.055.68-.74 1.548-1.11 2.6-1.11 1.033 0 1.852.323 2.458.966.606.644.91 1.572.91 2.784 0 .33-.033.676-.096 1.038h-5.314c.107.702.405 1.239.894 1.611.49.372 1.106.558 1.85.558.862 0 1.58-.202 2.155-.606zm6.605-1.77h-1.212c-.596 0-1.045.116-1.349.35-.303.234-.454.532-.454.894 0 .372.117.664.35.877.235.213.575.32 1.022.32.51 0 .912-.142 1.204-.424.293-.281.44-.651.44-1.108v-.91zm-4.068-2.554V9.325c.627-.361 1.457-.542 2.489-.542 2.116 0 3.175 1.026 3.175 3.08V17h-1.548v-.957c-.415.68-1.143 1.02-2.186 1.02-.766 0-1.38-.22-1.843-.661-.462-.442-.694-1.003-.694-1.684 0-.776.293-1.38.878-1.81.585-.431 1.404-.647 2.457-.647h1.34V11.8c0-.554-.133-.971-.399-1.253-.266-.282-.707-.423-1.324-.423a4.07 4.07 0 0 0-2.345.718zm9.333-1.93v1.42c.394-1 1.101-1.5 2.123-1.5.148 0 .313.016.494.048v1.531a1.885 1.885 0 0 0-.75-.143c-.542 0-.989.24-1.34.718-.351.479-.527 1.048-.527 1.707V17h-1.563V8.91h1.563zm5.01 4.084c.022.82.272 1.492.75 2.019.479.526 1.15.79 2.01.79.639 0 1.235-.176 1.788-.527v1.404c-.521.319-1.186.479-1.995.479-1.265 0-2.276-.4-3.031-1.197-.755-.798-1.133-1.792-1.133-2.984 0-1.16.38-2.151 1.14-2.975.761-.825 1.79-1.237 3.088-1.237.702 0 1.346.149 1.93.447v1.436a3.242 3.242 0 0 0-1.77-.495c-.84 0-1.513.266-2.019.798-.505.532-.758 1.213-.758 2.042zM40.24 5.72v4.579c.458-1 1.293-1.5 2.505-1.5.787 0 1.42.245 1.899.734.479.49.718 1.17.718 2.042V17h-1.564v-5.106c0-.553-.14-.98-.422-1.284-.282-.303-.652-.455-1.11-.455-.531 0-1.002.202-1.411.606-.41.405-.615 1.022-.615 1.851V17h-1.563V5.72h1.563zm14.966 10.02c.596 0 1.096-.253 1.5-.758.404-.506.606-1.157.606-1.955 0-.915-.202-1.62-.606-2.114-.404-.495-.92-.742-1.548-.742-.553 0-1.05.224-1.491.67-.442.447-.662 1.133-.662 2.058 0 .958.212 1.67.638 2.138.425.469.946.703 1.563.703zM53.004 5.72v4.42c.574-.894 1.388-1.341 2.44-1.341 1.022 0 1.857.383 2.506 1.149.649.766.973 1.781.973 3.047 0 1.138-.309 2.109-.925 2.912-.617.803-1.463 1.205-2.537 1.205-1.075 0-1.894-.447-2.457-1.34V17h-1.58V5.72h1.58zm9.908 11.104l-3.223-7.913h1.739l1.005 2.632 1.26 3.415c.096-.32.48-1.458 1.15-3.415l.909-2.632h1.66l-2.92 7.866c-.777 2.074-1.963 3.11-3.559 3.11a2.92 2.92 0 0 1-.734-.079v-1.34c.17.042.351.064.543.064 1.032 0 1.755-.57 2.17-1.708z' fill='%235D6494'/%3E%3Cpath d='M89.632 5.967v-.772a.978.978 0 0 0-.978-.977h-2.28a.978.978 0 0 0-.978.977v.793c0 .088.082.15.171.13a7.127 7.127 0 0 1 1.984-.28c.65 0 1.295.088 1.917.259.082.02.164-.04.164-.13m-6.248 1.01l-.39-.389a.977.977 0 0 0-1.382 0l-.465.465a.973.973 0 0 0 0 1.38l.383.383c.062.061.15.047.205-.014.226-.307.472-.601.746-.874.281-.28.568-.526.883-.751.068-.042.075-.137.02-.2m4.16 2.453v3.341c0 .096.104.165.192.117l2.97-1.537c.068-.034.089-.117.055-.184a3.695 3.695 0 0 0-3.08-1.866c-.068 0-.136.054-.136.13m0 8.048a4.489 4.489 0 0 1-4.49-4.482 4.488 4.488 0 0 1 4.49-4.482 4.488 4.488 0 0 1 4.489 4.482 4.484 4.484 0 0 1-4.49 4.482m0-10.85a6.363 6.363 0 1 0 0 12.729c3.518 0 6.372-2.85 6.372-6.368a6.358 6.358 0 0 0-6.371-6.36' fill='%23FFF'/%3E%3C/g%3E%3C/svg%3E%0A"); - background-repeat: no-repeat; - background-position: center; - background-size: 100%; - overflow: hidden; - text-indent: -9000px; - padding: 0 !important; - width: 100%; - height: 100%; - display: block; -} - -/*# sourceMappingURL=data:application/json;base64, */ \ No newline at end of file diff --git a/website/css/highlight.css b/website/css/highlight.css index 52f65bfc74e..e69de29bb2d 100644 --- a/website/css/highlight.css +++ b/website/css/highlight.css @@ -1,76 +0,0 @@ -/* - Name: Base16 Eighties Dark - Author: Chris Kempson (http://chriskempson.com) - Pygments template by Jan T. Sott (https://github.com/idleberg) - Created with Base16 Builder by Chris Kempson (https://github.com/chriskempson/base16-builder) -*/ - -@media (prefers-color-scheme: dark) { - -.syntax .hll { background-color: #515151 } -.syntax { background: #2d2d2d; color: #f2f0ec } -.syntax .c { color: #747369 } /* Comment */ -.syntax .err { color: #f2777a } /* Error */ -.syntax .k { color: #cc99cc } /* Keyword */ -.syntax .l { color: #f99157 } /* Literal */ -.syntax .n { color: #f2f0ec } /* Name */ -.syntax .o { color: #66cccc } /* Operator */ -.syntax .p { color: #f2f0ec } /* Punctuation */ -.syntax .cm { color: #747369 } /* Comment.Multiline */ -.syntax .cp { color: #747369 } /* Comment.Preproc */ -.syntax .c1 { color: #747369 } /* Comment.Single */ -.syntax .cs { color: #747369 } /* Comment.Special */ -.syntax .gd { color: #f2777a } /* Generic.Deleted */ -.syntax .ge { font-style: italic } /* Generic.Emph */ -.syntax .gh { color: #f2f0ec; font-weight: bold } /* Generic.Heading */ -.syntax .gi { color: #99cc99 } /* Generic.Inserted */ -.syntax .gp { color: #747369; font-weight: bold } /* Generic.Prompt */ -.syntax .gs { font-weight: bold } /* Generic.Strong */ -.syntax .gu { color: #66cccc; font-weight: bold } /* Generic.Subheading */ -.syntax .kc { color: #cc99cc } /* Keyword.Constant */ -.syntax .kd { color: #cc99cc } /* Keyword.Declaration */ -.syntax .kn { color: #66cccc } /* Keyword.Namespace */ -.syntax .kp { color: #cc99cc } /* Keyword.Pseudo */ -.syntax .kr { color: #cc99cc } /* Keyword.Reserved */ -.syntax .kt { color: #ffcc66 } /* Keyword.Type */ -.syntax .ld { color: #99cc99 } /* Literal.Date */ -.syntax .m { color: #f99157 } /* Literal.Number */ -.syntax .s { color: #99cc99 } /* Literal.String */ -.syntax .na { color: #6699cc } /* Name.Attribute */ -.syntax .nb { color: #f2f0ec } /* Name.Builtin */ -.syntax .nc { color: #ffcc66 } /* Name.Class */ -.syntax .no { color: #f2777a } /* Name.Constant */ -.syntax .nd { color: #66cccc } /* Name.Decorator */ -.syntax .ni { color: #f2f0ec } /* Name.Entity */ -.syntax .ne { color: #f2777a } /* Name.Exception */ -.syntax .nf { color: #6699cc } /* Name.Function */ -.syntax .nl { color: #f2f0ec } /* Name.Label */ -.syntax .nn { color: #ffcc66 } /* Name.Namespace */ -.syntax .nx { color: #6699cc } /* Name.Other */ -.syntax .py { color: #f2f0ec } /* Name.Property */ -.syntax .nt { color: #66cccc } /* Name.Tag */ -.syntax .nv { color: #f2777a } /* Name.Variable */ -.syntax .ow { color: #66cccc } /* Operator.Word */ -.syntax .w { color: #f2f0ec } /* Text.Whitespace */ -.syntax .mf { color: #f99157 } /* Literal.Number.Float */ -.syntax .mh { color: #f99157 } /* Literal.Number.Hex */ -.syntax .mi { color: #f99157 } /* Literal.Number.Integer */ -.syntax .mo { color: #f99157 } /* Literal.Number.Oct */ -.syntax .sb { color: #99cc99 } /* Literal.String.Backtick */ -.syntax .sc { color: #f2f0ec } /* Literal.String.Char */ -.syntax .sd { color: #747369 } /* Literal.String.Doc */ -.syntax .s2 { color: #99cc99 } /* Literal.String.Double */ -.syntax .se { color: #f99157 } /* Literal.String.Escape */ -.syntax .sh { color: #99cc99 } /* Literal.String.Heredoc */ -.syntax .si { color: #f99157 } /* Literal.String.Interpol */ -.syntax .sx { color: #99cc99 } /* Literal.String.Other */ -.syntax .sr { color: #99cc99 } /* Literal.String.Regex */ -.syntax .s1 { color: #99cc99 } /* Literal.String.Single */ -.syntax .ss { color: #99cc99 } /* Literal.String.Symbol */ -.syntax .bp { color: #f2f0ec } /* Name.Builtin.Pseudo */ -.syntax .vc { color: #f2777a } /* Name.Variable.Class */ -.syntax .vg { color: #f2777a } /* Name.Variable.Global */ -.syntax .vi { color: #f2777a } /* Name.Variable.Instance */ -.syntax .il { color: #f99157 } /* Literal.Number.Integer.Long */ - -} diff --git a/website/css/main.css b/website/css/main.css index cd8ac0a8810..1b4f7c48830 100644 --- a/website/css/main.css +++ b/website/css/main.css @@ -1 +1 @@ -@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy,.footer-links{white-space:nowrap}form .form-group{position:relative}form .form-group.is-select:before{border-left:6px solid transparent;border-right:6px solid transparent;border-top:8px solid #6c757d;content:"";display:block;position:absolute;right:33px;top:calc(50% - 4px);z-index:10}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none;padding-right:24px;white-space:pre-wrap}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}@media screen and (min-width:616px){.navbar.py-1+div .anchor-fixer :target{scroll-margin-top:62px}}@media screen and (min-width:616px){.navbar.py-2+div .anchor-fixer :target{scroll-margin-top:78px}}@media screen and (min-width:616px){.navbar.py-3+div .anchor-fixer :target{scroll-margin-top:94px}}@media screen and (min-width:616px){.navbar.py-4+div .anchor-fixer :target{scroll-margin-top:110px}}@media screen and (min-width:616px){.navbar.py-5+div .anchor-fixer :target{scroll-margin-top:126px}}@media screen and (min-width:616px){.navbar.py-6+div .anchor-fixer :target{scroll-margin-top:142px}}@media screen and (min-width:616px){.navbar.py-7+div .anchor-fixer :target{scroll-margin-top:158px}}@media screen and (min-width:616px){.navbar.py-8+div .anchor-fixer :target{scroll-margin-top:174px}}@media screen and (max-width:615.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:73px}}@media screen and (max-width:399.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.severity-table th{background:#f1f6f9;font-size:.875rem;padding:8px 16px}.severity-table td{border-top:1px solid #d6dbdf;padding:16px}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file +@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy,.footer-links{white-space:nowrap}form .form-group{position:relative}form .form-group.is-select:before{border-left:6px solid transparent;border-right:6px solid transparent;border-top:8px solid #6c757d;content:"";display:block;position:absolute;right:33px;top:calc(50% - 4px);z-index:10}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none;padding-right:24px;white-space:pre-wrap}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}@media screen and (min-width:616px){.navbar.py-1+div .anchor-fixer :target{scroll-margin-top:62px}}@media screen and (min-width:616px){.navbar.py-2+div .anchor-fixer :target{scroll-margin-top:78px}}@media screen and (min-width:616px){.navbar.py-3+div .anchor-fixer :target{scroll-margin-top:94px}}@media screen and (min-width:616px){.navbar.py-4+div .anchor-fixer :target{scroll-margin-top:110px}}@media screen and (min-width:616px){.navbar.py-5+div .anchor-fixer :target{scroll-margin-top:126px}}@media screen and (min-width:616px){.navbar.py-6+div .anchor-fixer :target{scroll-margin-top:142px}}@media screen and (min-width:616px){.navbar.py-7+div .anchor-fixer :target{scroll-margin-top:158px}}@media screen and (min-width:616px){.navbar.py-8+div .anchor-fixer :target{scroll-margin-top:174px}}@media screen and (max-width:615.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:73px}}@media screen and (max-width:399.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:80px}}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.severity-table th{background:#f1f6f9;font-size:.875rem;padding:8px 16px}.severity-table td{border-top:1px solid #d6dbdf;padding:16px}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file From ad52b9391360b74a8fe2ddfb2f3a9b2a8b8d93cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 May 2022 21:45:42 +0200 Subject: [PATCH 379/615] Remove trash --- website/benchmark/dbms/index.html | 2 +- website/benchmark/hardware/index.html | 2 +- website/benchmark/versions/index.html | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/website/benchmark/dbms/index.html b/website/benchmark/dbms/index.html index a856bbb0502..c4a700ed2df 100644 --- a/website/benchmark/dbms/index.html +++ b/website/benchmark/dbms/index.html @@ -35,7 +35,7 @@

diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html index 7b68e42f451..9c9b14b56da 100644 --- a/website/benchmark/hardware/index.html +++ b/website/benchmark/hardware/index.html @@ -35,7 +35,7 @@

Full results

-
+
diff --git a/website/benchmark/versions/index.html b/website/benchmark/versions/index.html index da0702a04de..cce85934a9b 100644 --- a/website/benchmark/versions/index.html +++ b/website/benchmark/versions/index.html @@ -35,7 +35,7 @@

Full results

-
+
From b3ee8114d924772d2f164bca7e8badd8328dddc3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 May 2022 22:33:41 +0200 Subject: [PATCH 380/615] Minor change --- utils/changelog-simple/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 utils/changelog-simple/.gitignore diff --git a/utils/changelog-simple/.gitignore b/utils/changelog-simple/.gitignore new file mode 100644 index 00000000000..78caa68e38e --- /dev/null +++ b/utils/changelog-simple/.gitignore @@ -0,0 +1,2 @@ +*.txt +*.json From 0c615798592866f43161455e99ac4db2c2031c98 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 22 May 2022 23:15:42 +0200 Subject: [PATCH 381/615] More comments --- .../ObjectStorages/DiskObjectStorage.cpp | 195 ---------------- src/Disks/ObjectStorages/DiskObjectStorage.h | 65 +----- .../DiskObjectStorageMetadata.cpp | 208 ++++++++++++++++++ .../DiskObjectStorageMetadata.h | 68 ++++++ .../DiskObjectStorageMetadataHelper.cpp | 1 + .../DiskObjectStorageMetadataHelper.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 2 +- 7 files changed, 286 insertions(+), 255 deletions(-) create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageMetadata.h diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index b1a396ffee6..163e2087bed 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -25,12 +25,8 @@ namespace ErrorCodes extern const int INCORRECT_DISK_INDEX; extern const int UNKNOWN_FORMAT; extern const int FILE_ALREADY_EXISTS; - extern const int PATH_ACCESS_DENIED; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; - extern const int MEMORY_LIMIT_EXCEEDED; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; } static String revisionToString(UInt64 revision) @@ -107,197 +103,6 @@ DiskObjectStorage::DiskObjectStorage( , metadata_helper(std::make_unique(this, ReadSettings{})) {} -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - return result; -} - - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.save(sync); - return result; -} - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - if (updater(result)) - result.save(sync); - return result; -} - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - updater(result); - result.save(sync); - return result; -} - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorage::MetadataUpdater updater) -{ - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - result.load(); - if (updater(result)) - result.save(sync); - metadata_disk_->removeFile(metadata_file_path_); - - return result; - -} - -DiskObjectStorage::Metadata DiskObjectStorage::Metadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite) -{ - if (overwrite || !metadata_disk_->exists(metadata_file_path_)) - { - return createAndStoreMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_, sync); - } - else - { - auto result = readMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_); - if (result.read_only) - throw Exception("File is read-only: " + metadata_file_path_, ErrorCodes::PATH_ACCESS_DENIED); - return result; - } -} - -void DiskObjectStorage::Metadata::load() -{ - try - { - const ReadSettings read_settings; - auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */ - - UInt32 version; - readIntText(version, *buf); - - if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) - throw Exception( - ErrorCodes::UNKNOWN_FORMAT, - "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", - metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG)); - - assertChar('\n', *buf); - - UInt32 remote_fs_objects_count; - readIntText(remote_fs_objects_count, *buf); - assertChar('\t', *buf); - readIntText(total_size, *buf); - assertChar('\n', *buf); - remote_fs_objects.resize(remote_fs_objects_count); - - for (size_t i = 0; i < remote_fs_objects_count; ++i) - { - String remote_fs_object_path; - size_t remote_fs_object_size; - readIntText(remote_fs_object_size, *buf); - assertChar('\t', *buf); - readEscapedString(remote_fs_object_path, *buf); - if (version == VERSION_ABSOLUTE_PATHS) - { - if (!remote_fs_object_path.starts_with(remote_fs_root_path)) - throw Exception(ErrorCodes::UNKNOWN_FORMAT, - "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", - remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath()); - - remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); - } - assertChar('\n', *buf); - remote_fs_objects[i].relative_path = remote_fs_object_path; - remote_fs_objects[i].bytes_size = remote_fs_object_size; - } - - readIntText(ref_count, *buf); - assertChar('\n', *buf); - - if (version >= VERSION_READ_ONLY_FLAG) - { - readBoolText(read_only, *buf); - assertChar('\n', *buf); - } - } - catch (Exception & e) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - - if (e.code() == ErrorCodes::UNKNOWN_FORMAT) - throw; - - if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) - throw; - - throw Exception("Failed to read metadata file: " + metadata_file_path, ErrorCodes::UNKNOWN_FORMAT); - } -} - -/// Load metadata by path or create empty if `create` flag is set. -DiskObjectStorage::Metadata::Metadata( - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - const String & metadata_file_path_) - : remote_fs_root_path(remote_fs_root_path_) - , metadata_file_path(metadata_file_path_) - , metadata_disk(metadata_disk_) - , total_size(0), ref_count(0) -{ -} - -void DiskObjectStorage::Metadata::addObject(const String & path, size_t size) -{ - total_size += size; - remote_fs_objects.emplace_back(path, size); -} - - -void DiskObjectStorage::Metadata::saveToBuffer(WriteBuffer & buf, bool sync) -{ - writeIntText(VERSION_RELATIVE_PATHS, buf); - writeChar('\n', buf); - - writeIntText(remote_fs_objects.size(), buf); - writeChar('\t', buf); - writeIntText(total_size, buf); - writeChar('\n', buf); - - for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) - { - writeIntText(remote_fs_object_size, buf); - writeChar('\t', buf); - writeEscapedString(remote_fs_object_path, buf); - writeChar('\n', buf); - } - - writeIntText(ref_count, buf); - writeChar('\n', buf); - - writeBoolText(read_only, buf); - writeChar('\n', buf); - - buf.finalize(); - if (sync) - buf.sync(); - -} - -/// Fsync metadata file if 'sync' flag is set. -void DiskObjectStorage::Metadata::save(bool sync) -{ - auto buf = metadata_disk->writeFile(metadata_file_path, 1024); - saveToBuffer(*buf, sync); -} - -std::string DiskObjectStorage::Metadata::serializeToString() -{ - WriteBufferFromOwnString write_buf; - saveToBuffer(write_buf, false); - return write_buf.str(); -} - DiskObjectStorage::Metadata DiskObjectStorage::readMetadataUnlocked(const String & path, std::shared_lock &) const { return Metadata::readMetadata(remote_fs_root_path, metadata_disk, path); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index e7cbb04ff99..9a60a7ad25e 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace CurrentMetrics @@ -13,7 +14,11 @@ namespace CurrentMetrics namespace DB { - +/// Disk build on top of IObjectStorage. Use additional disk (local for example) +/// for metadata storage. Metadata is a small files with mapping from local paths to +/// objects in object storage, like: +/// "/var/lib/clickhouse/data/db/table/all_0_0_0/columns.txt" -> /xxxxxxxxxxxxxxxxxxxx +/// -> /yyyyyyyyyyyyyyyyyyyy class DiskObjectStorage : public IDisk { @@ -37,7 +42,7 @@ public: bool supportParallelWrite() const override { return true; } - struct Metadata; + using Metadata = DiskObjectStorageMetadata; using MetadataUpdater = std::function; const String & getName() const override { return name; } @@ -192,62 +197,6 @@ private: std::unique_ptr metadata_helper; }; -struct DiskObjectStorage::Metadata -{ - using Updater = std::function; - /// Metadata file version. - static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; - static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; - static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; - - /// Remote FS objects paths and their sizes. - std::vector remote_fs_objects; - - /// URI - const String & remote_fs_root_path; - - /// Relative path to metadata file on local FS. - const String metadata_file_path; - - DiskPtr metadata_disk; - - /// Total size of all remote FS (S3, HDFS) objects. - size_t total_size = 0; - - /// Number of references (hardlinks) to this metadata file. - /// - /// FIXME: Why we are tracking it explicetly, without - /// info from filesystem???? - UInt32 ref_count = 0; - - /// Flag indicates that file is read only. - bool read_only = false; - - Metadata( - const String & remote_fs_root_path_, - DiskPtr metadata_disk_, - const String & metadata_file_path_); - - void addObject(const String & path, size_t size); - - static Metadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_); - static Metadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - static Metadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - - static Metadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync); - static Metadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); - static Metadata createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite); - - /// Serialize metadata to string (very same with saveToBuffer) - std::string serializeToString(); - -private: - /// Fsync metadata file if 'sync' flag is set. - void save(bool sync = false); - void saveToBuffer(WriteBuffer & buffer, bool sync); - void load(); -}; - class DiskObjectStorageReservation final : public IReservation { public: diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp new file mode 100644 index 00000000000..2e1ef31f8f0 --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -0,0 +1,208 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_FORMAT; + extern const int PATH_ACCESS_DENIED; + extern const int MEMORY_LIMIT_EXCEEDED; +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + return result; +} + + +DiskObjectStorageMetadata DiskObjectStorageMetadata::createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.save(sync); + return result; +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + if (updater(result)) + result.save(sync); + return result; +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + updater(result); + result.save(sync); + return result; +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater) +{ + DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + result.load(); + if (updater(result)) + result.save(sync); + metadata_disk_->removeFile(metadata_file_path_); + + return result; + +} + +DiskObjectStorageMetadata DiskObjectStorageMetadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite) +{ + if (overwrite || !metadata_disk_->exists(metadata_file_path_)) + { + return createAndStoreMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_, sync); + } + else + { + auto result = readMetadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_); + if (result.read_only) + throw Exception("File is read-only: " + metadata_file_path_, ErrorCodes::PATH_ACCESS_DENIED); + return result; + } +} + +void DiskObjectStorageMetadata::load() +{ + try + { + const ReadSettings read_settings; + auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */ + + UInt32 version; + readIntText(version, *buf); + + if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) + throw Exception( + ErrorCodes::UNKNOWN_FORMAT, + "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", + metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG)); + + assertChar('\n', *buf); + + UInt32 remote_fs_objects_count; + readIntText(remote_fs_objects_count, *buf); + assertChar('\t', *buf); + readIntText(total_size, *buf); + assertChar('\n', *buf); + remote_fs_objects.resize(remote_fs_objects_count); + + for (size_t i = 0; i < remote_fs_objects_count; ++i) + { + String remote_fs_object_path; + size_t remote_fs_object_size; + readIntText(remote_fs_object_size, *buf); + assertChar('\t', *buf); + readEscapedString(remote_fs_object_path, *buf); + if (version == VERSION_ABSOLUTE_PATHS) + { + if (!remote_fs_object_path.starts_with(remote_fs_root_path)) + throw Exception(ErrorCodes::UNKNOWN_FORMAT, + "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", + remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath()); + + remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); + } + assertChar('\n', *buf); + remote_fs_objects[i].relative_path = remote_fs_object_path; + remote_fs_objects[i].bytes_size = remote_fs_object_size; + } + + readIntText(ref_count, *buf); + assertChar('\n', *buf); + + if (version >= VERSION_READ_ONLY_FLAG) + { + readBoolText(read_only, *buf); + assertChar('\n', *buf); + } + } + catch (Exception & e) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + + if (e.code() == ErrorCodes::UNKNOWN_FORMAT) + throw; + + if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) + throw; + + throw Exception("Failed to read metadata file: " + metadata_file_path, ErrorCodes::UNKNOWN_FORMAT); + } +} + +/// Load metadata by path or create empty if `create` flag is set. +DiskObjectStorageMetadata::DiskObjectStorageMetadata( + const String & remote_fs_root_path_, + DiskPtr metadata_disk_, + const String & metadata_file_path_) + : remote_fs_root_path(remote_fs_root_path_) + , metadata_file_path(metadata_file_path_) + , metadata_disk(metadata_disk_) + , total_size(0), ref_count(0) +{ +} + +void DiskObjectStorageMetadata::addObject(const String & path, size_t size) +{ + total_size += size; + remote_fs_objects.emplace_back(path, size); +} + + +void DiskObjectStorageMetadata::saveToBuffer(WriteBuffer & buf, bool sync) +{ + writeIntText(VERSION_RELATIVE_PATHS, buf); + writeChar('\n', buf); + + writeIntText(remote_fs_objects.size(), buf); + writeChar('\t', buf); + writeIntText(total_size, buf); + writeChar('\n', buf); + + for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) + { + writeIntText(remote_fs_object_size, buf); + writeChar('\t', buf); + writeEscapedString(remote_fs_object_path, buf); + writeChar('\n', buf); + } + + writeIntText(ref_count, buf); + writeChar('\n', buf); + + writeBoolText(read_only, buf); + writeChar('\n', buf); + + buf.finalize(); + if (sync) + buf.sync(); + +} + +/// Fsync metadata file if 'sync' flag is set. +void DiskObjectStorageMetadata::save(bool sync) +{ + auto buf = metadata_disk->writeFile(metadata_file_path, 1024); + saveToBuffer(*buf, sync); +} + +std::string DiskObjectStorageMetadata::serializeToString() +{ + WriteBufferFromOwnString write_buf; + saveToBuffer(write_buf, false); + return write_buf.str(); +} + + +} diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h new file mode 100644 index 00000000000..ba6b7f952fc --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h @@ -0,0 +1,68 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Metadata for DiskObjectStorage, stored on local disk +struct DiskObjectStorageMetadata +{ + using Updater = std::function; + /// Metadata file version. + static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; + static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; + static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; + + /// Remote FS objects paths and their sizes. + std::vector remote_fs_objects; + + /// URI + const String & remote_fs_root_path; + + /// Relative path to metadata file on local FS. + const String metadata_file_path; + + DiskPtr metadata_disk; + + /// Total size of all remote FS (S3, HDFS) objects. + size_t total_size = 0; + + /// Number of references (hardlinks) to this metadata file. + /// + /// FIXME: Why we are tracking it explicetly, without + /// info from filesystem???? + UInt32 ref_count = 0; + + /// Flag indicates that file is read only. + bool read_only = false; + + DiskObjectStorageMetadata( + const String & remote_fs_root_path_, + DiskPtr metadata_disk_, + const String & metadata_file_path_); + + void addObject(const String & path, size_t size); + + static DiskObjectStorageMetadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_); + static DiskObjectStorageMetadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + static DiskObjectStorageMetadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + + static DiskObjectStorageMetadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync); + static DiskObjectStorageMetadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater); + static DiskObjectStorageMetadata createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite); + + /// Serialize metadata to string (very same with saveToBuffer) + std::string serializeToString(); + +private: + /// Fsync metadata file if 'sync' flag is set. + void save(bool sync = false); + void saveToBuffer(WriteBuffer & buffer, bool sync); + void load(); +}; + +using DiskObjectStorageMetadataUpdater = std::function; + +} diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp index 8e680663358..a7e34f7ccd4 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes { extern const int UNKNOWN_FORMAT; extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } static String revisionToString(UInt64 revision) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h index 2d8775030e5..43c402a4508 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -10,7 +10,7 @@ class DiskObjectStorage; /// Class implements storage of ObjectStorage metadata inside object storage itself, /// so it's possible to recover from this remote information in case of local disk loss. /// -/// This machanism can be enabled with `true` option inside +/// This mechanism can be enabled with `true` option inside /// disk configuration. Implemented only for S3 and Azure Block storage. Other object storages /// doesn't support metadata for blobs. /// diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index ebf3ba54324..b96ddeb2de1 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -65,7 +65,7 @@ public: /// Path exists or not virtual bool exists(const std::string & path) const = 0; - /// List on prefix, return childs with their sizes. + /// List on prefix, return children with their sizes. virtual void listPrefix(const std::string & path, BlobsPathToSize & children) const = 0; /// Get object metadata if supported. It should be possible to receive From c478f2c21a9bf6c5fa39bda108e67ac4f9e6d7c9 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 22 May 2022 20:13:43 -0300 Subject: [PATCH 382/615] Update uuid-functions.md --- .../sql-reference/functions/uuid-functions.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/uuid-functions.md b/docs/ru/sql-reference/functions/uuid-functions.md index babeb0d2693..554e78002b8 100644 --- a/docs/ru/sql-reference/functions/uuid-functions.md +++ b/docs/ru/sql-reference/functions/uuid-functions.md @@ -9,10 +9,16 @@ sidebar_label: "Функции для работы с UUID" Генерирует идентификатор [UUID версии 4](https://tools.ietf.org/html/rfc4122#section-4.4). +**Синтаксис** + ``` sql -generateUUIDv4() +generateUUIDv4([x]) ``` +**Аргументы** + +- `x` — [выражение](../syntax.md#syntax-expressions), возвращающее значение одного из [поддерживаемых типов данных](../data-types/index.md#data_types). Значение используется, чтобы избежать [склейки одинаковых выражений](index.md#common-subexpression-elimination), если функция вызывается несколько раз в одном запросе. Необязательный параметр. + **Возвращаемое значение** Значение типа [UUID](../../sql-reference/functions/uuid-functions.md). @@ -35,6 +41,15 @@ SELECT * FROM t_uuid └──────────────────────────────────────┘ ``` +**Пример использования, для генерации нескольких значений в одной строке** + +```sql +SELECT generateUUIDv4(1), generateUUIDv4(2) +┌─generateUUIDv4(1)────────────────────┬─generateUUIDv4(2)────────────────────┐ +│ 2d49dc6e-ddce-4cd0-afb8-790956df54c1 │ 8abf8c13-7dea-4fdf-af3e-0e18767770e6 │ +└──────────────────────────────────────┴──────────────────────────────────────┘ +``` + ## empty {#empty} Проверяет, является ли входной UUID пустым. From 5984d9993dc005acbc51918593ecc688c41af8e4 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 22 May 2022 20:17:45 -0300 Subject: [PATCH 383/615] Update uuid-functions.md --- .../sql-reference/functions/uuid-functions.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index d23b505a93f..c2b5f761844 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -11,10 +11,16 @@ The functions for working with UUID are listed below. Generates the [UUID](../data-types/uuid.md) of [version 4](https://tools.ietf.org/html/rfc4122#section-4.4). +**Syntax** + ``` sql -generateUUIDv4() +generateUUIDv4([x]) ``` +**Arguments** + +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. + **Returned value** The UUID type value. @@ -37,6 +43,15 @@ SELECT * FROM t_uuid └──────────────────────────────────────┘ ``` +**Usage example if mupliple columns needed to be generated in one row** + +```sql +SELECT generateUUIDv4(1), generateUUIDv4(2) +┌─generateUUIDv4(1)────────────────────┬─generateUUIDv4(2)────────────────────┐ +│ 2d49dc6e-ddce-4cd0-afb8-790956df54c1 │ 8abf8c13-7dea-4fdf-af3e-0e18767770e6 │ +└──────────────────────────────────────┴──────────────────────────────────────┘ +``` + ## empty {#empty} Checks whether the input UUID is empty. From f8e2e0f1b24f60045821d450276d84e23f4042e3 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 22 May 2022 20:24:18 -0300 Subject: [PATCH 384/615] Update uuid-functions.md --- docs/en/sql-reference/functions/uuid-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index c2b5f761844..08f281ba281 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -43,7 +43,7 @@ SELECT * FROM t_uuid └──────────────────────────────────────┘ ``` -**Usage example if mupliple columns needed to be generated in one row** +**Usage example if it is needed to generate multiple values in one row** ```sql SELECT generateUUIDv4(1), generateUUIDv4(2) From 2f93f11144e9b881affc940354d3b5b3dd7f036c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 02:03:13 +0200 Subject: [PATCH 385/615] Maybe better --- programs/CMakeLists.txt | 19 ++++- programs/install/Install.cpp | 5 +- programs/main.cpp | 80 -------------------- programs/su/CMakeLists.txt | 3 + programs/su/su.cpp | 138 +++++++++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 87 deletions(-) create mode 100644 programs/su/CMakeLists.txt create mode 100644 programs/su/su.cpp diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index ad59ec20e39..77a91dfc3ae 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -62,6 +62,8 @@ option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_ option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_SU "A tool similar to 'su'" ${ENABLE_CLICKHOUSE_ALL}) + if (NOT ENABLE_NURAFT) # RECONFIGURE_MESSAGE_LEVEL should not be used here, # since ENABLE_NURAFT is set to OFF for FreeBSD and Darwin. @@ -237,6 +239,7 @@ add_subdirectory (install) add_subdirectory (git-import) add_subdirectory (bash-completion) add_subdirectory (static-files-disk-uploader) +add_subdirectory (su) if (ENABLE_CLICKHOUSE_KEEPER) add_subdirectory (keeper) @@ -269,7 +272,8 @@ if (CLICKHOUSE_ONE_SHARED) ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${CLICKHOUSE_KEEPER_SOURCES} ${CLICKHOUSE_KEEPER_CONVERTER_SOURCES} - ${CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_SOURCES}) + ${CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_SOURCES} + ${CLICKHOUSE_SU_SOURCES}) target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} @@ -285,7 +289,8 @@ if (CLICKHOUSE_ONE_SHARED) ${CLICKHOUSE_ODBC_BRIDGE_LINK} ${CLICKHOUSE_KEEPER_LINK} ${CLICKHOUSE_KEEPER_CONVERTER_LINK} - ${CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_LINK}) + ${CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_LINK} + ${CLICKHOUSE_SU_LINK}) target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} @@ -319,7 +324,7 @@ if (CLICKHOUSE_SPLIT_BINARY) clickhouse-git-import clickhouse-copier clickhouse-static-files-disk-uploader - ) + clickhouse-su) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) @@ -387,6 +392,9 @@ else () if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER) clickhouse_target_link_split_lib(clickhouse static-files-disk-uploader) endif () + if (ENABLE_CLICKHOUSE_SU) + clickhouse_target_link_split_lib(clickhouse su) + endif () if (ENABLE_CLICKHOUSE_KEEPER) clickhouse_target_link_split_lib(clickhouse keeper) endif() @@ -453,6 +461,11 @@ else () install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-static-files-disk-uploader" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-static-files-disk-uploader) endif () + if (ENABLE_CLICKHOUSE_SU) + add_custom_target (clickhouse-su ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-su DEPENDS clickhouse) + install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-su" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-su) + endif () if (ENABLE_CLICKHOUSE_KEEPER) if (NOT BUILD_STANDALONE_KEEPER AND CREATE_KEEPER_SYMLINK) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 2ce742d9fec..33b4e687f85 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -925,10 +925,7 @@ namespace executable.string(), config.string(), pid_file.string()); if (!user.empty()) - { - if (0 != setenv("CLICKHOUSE_SETUID", user.c_str(), true)) - throwFromErrno("Cannot set environment variable CLICKHOUSE_SETUID that is required to dropping privileges", ErrorCodes::SYSTEM_ERROR); - } + command = fmt::format("clickhouse su '{}' {}", user, command); fmt::print("Will run {}\n", command); executeScript(command, true); diff --git a/programs/main.cpp b/programs/main.cpp index bee1496095c..5802e195bd0 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -6,10 +6,6 @@ #include #endif -#include -#include -#include - #include #include #include @@ -24,7 +20,6 @@ #include #include #include -#include #include #include @@ -34,7 +29,6 @@ namespace DB { namespace ErrorCodes { - extern const int SYSTEM_ERROR; extern const int BAD_ARGUMENTS; } } @@ -347,69 +341,6 @@ struct Checker ; -/// ClickHouse can drop privileges at startup. It is controlled by environment variables. -void setUserAndGroup() -{ - using namespace DB; - - static constexpr size_t buf_size = 16384; /// Linux man page says it is enough. Nevertheless, we will check if it's not enough and throw. - std::unique_ptr buf(new char[buf_size]); - - /// Set the group first, because if we set user, the privileges will be already dropped and we will not be able to set the group later. - - const char * env_gid = getenv("CLICKHOUSE_SETGID"); - if (env_gid && env_gid[0]) - { - gid_t gid = 0; - if (!tryParse(gid, env_gid) || gid == 0) - { - group entry{}; - group * result{}; - - if (0 != getgrnam_r(env_gid, &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); - - if (!result) - throw Exception("Group {} specified in the CLICKHOUSE_SETGID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); - - gid = entry.gr_gid; - } - - if (gid == 0) - throw Exception("Group specified in the CLICKHOUSE_SETGID environment variable has id 0, but dropping privileges to gid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); - - if (0 != setgid(gid)) - throwFromErrno(fmt::format("Cannot do 'setgid' to user, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); - } - - const char * env_uid = getenv("CLICKHOUSE_SETUID"); - if (env_uid && env_uid[0]) - { - /// Is it numeric id or name? - uid_t uid = 0; - if (!tryParse(uid, env_uid) || uid == 0) - { - passwd entry{}; - passwd * result{}; - - if (0 != getpwnam_r(env_uid, &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); - - if (!result) - throw Exception("User {} specified in the CLICKHOUSE_SETUID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); - - uid = entry.pw_uid; - } - - if (uid == 0) - throw Exception("User specified in the CLICKHOUSE_SETUID environment variable has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); - - if (0 != setuid(uid)) - throwFromErrno(fmt::format("Cannot do 'setuid' to user, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); - } -} - - /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. void checkHarmfulEnvironmentVariables(char ** argv) { @@ -487,17 +418,6 @@ int main(int argc_, char ** argv_) /// will work only after additional call of this function. updatePHDRCache(); - /// Drop privileges if needed. - try - { - setUserAndGroup(); - } - catch (...) - { - std::cerr << DB::getCurrentExceptionMessage("setUserAndGroup", false) << '\n'; - return 1; - } - checkHarmfulEnvironmentVariables(argv_); /// Reset new handler to default (that throws std::bad_alloc) diff --git a/programs/su/CMakeLists.txt b/programs/su/CMakeLists.txt new file mode 100644 index 00000000000..1187deeeea7 --- /dev/null +++ b/programs/su/CMakeLists.txt @@ -0,0 +1,3 @@ +set (CLICKHOUSE_SU_SOURCES su.cpp) +set (CLICKHOUSE_SU_LINK PRIVATE dbms) +clickhouse_program_add(su) diff --git a/programs/su/su.cpp b/programs/su/su.cpp new file mode 100644 index 00000000000..d6fcf3085ed --- /dev/null +++ b/programs/su/su.cpp @@ -0,0 +1,138 @@ +#include +#include +#include + +#include +#include +#include +#include + + +/// "su" means "set user" +/// In fact, this program can set Unix user and group. +/// +/// Usage: +/// clickhouse su user[:group] args... +/// +/// - will set user and, optionally, group and exec the remaining args. +/// user and group can be numeric identifiers or strings. +/// +/// The motivation for this tool is very obscure and idiosyncratic. It is needed for Docker. +/// People want to run programs inside Docker with dropped privileges (less than root). +/// But the standard Linux "su" program is not suitable for usage inside Docker, +/// because it is creating pseudoterminals to avoid hijacking input from the terminal, for security, +/// but Docker is also doing something with the terminal and it is incompatible. +/// For this reason, people use alternative and less "secure" versions of "su" tools like "gosu" or "su-exec". +/// But it would be very strange to use 3rd-party software only to do two-three syscalls. +/// That's why we provide this tool. +/// +/// Note: ClickHouse does not need Docker at all and works better without Docker. +/// ClickHouse has no dependencies, it is packaged and distributed in single binary. +/// There is no reason to use Docker unless you are already running all your software in Docker. + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int SYSTEM_ERROR; +} + +void setUserAndGroup(std::string_view arg_uid, std::string_view arg_gid) +{ + static constexpr size_t buf_size = 16384; /// Linux man page says it is enough. Nevertheless, we will check if it's not enough and throw. + std::unique_ptr buf(new char[buf_size]); + + /// Set the group first, because if we set user, the privileges will be already dropped and we will not be able to set the group later. + + if (!arg_gid.empty()) + { + gid_t gid = 0; + if (!tryParse(gid, arg_gid) || gid == 0) + { + group entry{}; + group * result{}; + + if (0 != getgrnam_r(env_gid, &entry, buf.get(), buf_size, &result)) + throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); + + if (!result) + throw Exception("Group {} specified in the CLICKHOUSE_SETGID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); + + gid = entry.gr_gid; + } + + if (gid == 0) + throw Exception("Group specified in the CLICKHOUSE_SETGID environment variable has id 0, but dropping privileges to gid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); + + if (0 != setgid(gid)) + throwFromErrno(fmt::format("Cannot do 'setgid' to user, specified in the CLICKHOUSE_SETGID environment variable ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); + } + + if (!arg_uid.empty()) + { + /// Is it numeric id or name? + uid_t uid = 0; + if (!tryParse(uid, arg_uid) || uid == 0) + { + passwd entry{}; + passwd * result{}; + + if (0 != getpwnam_r(arg_uid, &entry, buf.get(), buf_size, &result)) + throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); + + if (!result) + throw Exception("User {} specified in the CLICKHOUSE_SETUID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); + + uid = entry.pw_uid; + } + + if (uid == 0) + throw Exception("User specified in the CLICKHOUSE_SETUID environment variable has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); + + if (0 != setuid(uid)) + throwFromErrno(fmt::format("Cannot do 'setuid' to user, specified in the CLICKHOUSE_SETUID environment variable ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); + } +} + +} + + +int mainEntryClickHouseSU(int argc, char ** argv) +try +{ + using namespace DB; + + if (argc < 3) + { + std::cout << "Usage: ./clickhouse su user:group ..." << std::endl; + exit(0); + } + + std::string_view user_and_group = argv[1]; + + std::string_view user; + std::string_view group; + + auto pos = user_and_group.find(':'); + if (pos == std::string_view::npos) + { + user = user_and_group; + } + else + { + user = user_and_group.substr(0, pos); + group = user_and_group.substr(pos + 1); + } + + setUserAndGroup(user, group); + + execvp(argv[0], &argv[2]); + throwFromErrno("Cannot execvp", ErrorCodes::SYSTEM_ERROR); +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(false); + return 1; +} From f614c139f4005a161873bd501ba54ead24daa395 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 02:06:03 +0200 Subject: [PATCH 386/615] Maybe better --- docker/server/entrypoint.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 996e58d09ad..8d46b70cb2c 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -90,18 +90,12 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL EOT fi -# Drop privileges -CLICKHOUSE_SETUID="${USER}" -CLICKHOUSE_SETGID="${GROUP}" -export CLICKHOUSE_SETUID -export CLICKHOUSE_SETGID - if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # port is needed to check if clickhouse-server is ready for connections HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)" # Listen only on localhost until the initialization is done - /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 & + /usr/bin/clickhouse su "${USER}:${GROUP}" /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 & pid="$!" # check if clickhouse is ready to accept connections @@ -157,7 +151,7 @@ if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then # so the container can't be finished by ctrl+c CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0} export CLICKHOUSE_WATCHDOG_ENABLE - /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@" + /usr/bin/clickhouse su "${USER}:${GROUP}" /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@" fi # Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image From 5d43f4690834e4a81c5fa2b3d2a4eb954fa29399 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 03:09:28 +0300 Subject: [PATCH 387/615] Update ym-dict-functions.md --- docs/en/sql-reference/functions/ym-dict-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 20f43200a16..4fc727844e7 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -105,7 +105,7 @@ Example: `regionToCountry(toUInt32(213)) = 225` converts Moscow (213) to Russia Converts a region to a continent. In every other way, this function is the same as ‘regionToCity’. Example: `regionToContinent(toUInt32(213)) = 10001` converts Moscow (213) to Eurasia (10001). -### regionToTopContinent (id\[, geobase\]) {#regiontotopcontinentid-geobase} +### regionToTopContinent(id\[, geobase\]) {#regiontotopcontinentid-geobase} Finds the highest continent in the hierarchy for the region. From cc985d9b926d96bcd2b4d6bae6ddce9590eb8e7c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 02:30:33 +0200 Subject: [PATCH 388/615] Fix error --- programs/main.cpp | 8 -------- src/IO/ReadHelpers.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/programs/main.cpp b/programs/main.cpp index 5802e195bd0..3be624d6cd9 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -25,14 +25,6 @@ #include -namespace DB -{ - namespace ErrorCodes - { - extern const int BAD_ARGUMENTS; - } -} - /// Universal executable for various clickhouse applications #if ENABLE_CLICKHOUSE_SERVER int mainEntryClickHouseServer(int argc, char ** argv); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index c5ffa52c9b3..28fc90817d5 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -1347,6 +1349,12 @@ inline T parse(const String & s) return parse(s.data(), s.size()); } +template +inline T parse(std::string_view) +{ + return parse(s.data(), s.size()); +} + template inline bool tryParse(T & res, const char * data) { @@ -1359,6 +1367,12 @@ inline bool tryParse(T & res, const String & s) return tryParse(res, s.data(), s.size()); } +template +inline bool tryParse(T & res, std::string_view s) +{ + return tryParse(res, s.data(), s.size()); +} + /** Skip UTF-8 BOM if it is under cursor. * As BOM is usually located at start of stream, and buffer size is usually larger than three bytes, From c541d30a38bc890323fa5a703c3d1ae1c5d6f503 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 02:48:05 +0200 Subject: [PATCH 389/615] Fix error --- src/IO/ReadHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 28fc90817d5..32a8a870814 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1350,7 +1350,7 @@ inline T parse(const String & s) } template -inline T parse(std::string_view) +inline T parse(std::string_view s) { return parse(s.data(), s.size()); } From b432ca93a1bd463e093bc967085874c7503f1393 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 03:11:12 +0200 Subject: [PATCH 390/615] Fix error --- programs/su/su.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/su/su.cpp b/programs/su/su.cpp index d6fcf3085ed..886e20a1752 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -39,7 +39,7 @@ namespace ErrorCodes extern const int SYSTEM_ERROR; } -void setUserAndGroup(std::string_view arg_uid, std::string_view arg_gid) +void setUserAndGroup(std::string arg_uid, std::string arg_gid) { static constexpr size_t buf_size = 16384; /// Linux man page says it is enough. Nevertheless, we will check if it's not enough and throw. std::unique_ptr buf(new char[buf_size]); @@ -54,7 +54,7 @@ void setUserAndGroup(std::string_view arg_uid, std::string_view arg_gid) group entry{}; group * result{}; - if (0 != getgrnam_r(env_gid, &entry, buf.get(), buf_size, &result)) + if (0 != getgrnam_r(arg_gid.data(), &entry, buf.get(), buf_size, &result)) throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); if (!result) @@ -79,7 +79,7 @@ void setUserAndGroup(std::string_view arg_uid, std::string_view arg_gid) passwd entry{}; passwd * result{}; - if (0 != getpwnam_r(arg_uid, &entry, buf.get(), buf_size, &result)) + if (0 != getpwnam_r(arg_uid.data(), &entry, buf.get(), buf_size, &result)) throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); if (!result) From 39d157b9fb999855ea601f65155bc883596723d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 03:52:00 +0200 Subject: [PATCH 391/615] Fix error --- programs/su/su.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/su/su.cpp b/programs/su/su.cpp index 886e20a1752..0b5d65d9da7 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -55,7 +55,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) group * result{}; if (0 != getgrnam_r(arg_gid.data(), &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name, specified in the CLICKHOUSE_SETGID environment variable ({})", env_gid), ErrorCodes::SYSTEM_ERROR); + throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name, specified in the CLICKHOUSE_SETGID environment variable ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); if (!result) throw Exception("Group {} specified in the CLICKHOUSE_SETGID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); @@ -80,7 +80,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) passwd * result{}; if (0 != getpwnam_r(arg_uid.data(), &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", env_uid), ErrorCodes::SYSTEM_ERROR); + throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); if (!result) throw Exception("User {} specified in the CLICKHOUSE_SETUID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); From 03b118557c00f40882fb3f92fd75d7c642beb7c9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 03:52:44 +0200 Subject: [PATCH 392/615] Fix error --- programs/su/su.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/su/su.cpp b/programs/su/su.cpp index 0b5d65d9da7..490e966955f 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -112,8 +112,8 @@ try std::string_view user_and_group = argv[1]; - std::string_view user; - std::string_view group; + std::string user; + std::string group; auto pos = user_and_group.find(':'); if (pos == std::string_view::npos) @@ -126,7 +126,7 @@ try group = user_and_group.substr(pos + 1); } - setUserAndGroup(user, group); + setUserAndGroup(std::move(user), std::move(group)); execvp(argv[0], &argv[2]); throwFromErrno("Cannot execvp", ErrorCodes::SYSTEM_ERROR); From 89c8d136c3b4d56cd7fc158eb3c5561df3e90200 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 23 May 2022 07:20:36 +0200 Subject: [PATCH 393/615] Fix error --- programs/config_tools.h.in | 1 + programs/main.cpp | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index b97eb63b535..aafe3ae8701 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -19,3 +19,4 @@ #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER #cmakedefine01 ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER +#cmakedefine01 ENABLE_CLICKHOUSE_SU diff --git a/programs/main.cpp b/programs/main.cpp index 3be624d6cd9..e505246a908 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -65,6 +65,9 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv); #if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv); #endif +#if ENABLE_CLICKHOUSE_SU +int mainEntryClickHouseSU(int argc, char ** argv); +#endif #if ENABLE_CLICKHOUSE_INSTALL int mainEntryClickHouseInstall(int argc, char ** argv); int mainEntryClickHouseStart(int argc, char ** argv); @@ -136,6 +139,9 @@ std::pair clickhouse_applications[] = #endif #if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER {"static-files-disk-uploader", mainEntryClickHouseStaticFilesDiskUploader}, +#endif +#if ENABLE_CLICKHOUSE_SU + {"su", mainEntryClickHouseSU}, #endif {"hash-binary", mainEntryClickHouseHashBinary}, }; From e25ca139cd033049fc30f5ab8dc972732bafb993 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 16 May 2022 22:37:31 +0200 Subject: [PATCH 394/615] Implement SQL functions (NOT) (I)LIKE() + MATCH() with non-const needles With this commit, SQL functions LIKE and MATCH and their variants can work with non-const needle arguments. E.g. create table tab (id UInt32, haystack String, needle String) engine = MergeTree() order by id; insert into tab values (1, 'Hello', '%ell%') (2, 'World', '%orl%') select id, haystack, needle, like(haystack, needle) from tab; For that, methods vectorVector() and vectorFixedVector() were added to MatchImpl. The existing code for const needles has an optimization where the compiled regexp is cached. The new code expects a different needle per row and consequently does not cache the regexp. --- src/Functions/CountSubstringsImpl.h | 6 + src/Functions/FunctionsStringSearch.h | 12 +- src/Functions/FunctionsVisitParam.h | 6 + src/Functions/HasTokenImpl.h | 6 + src/Functions/MatchImpl.h | 293 +++++++++++++++--- src/Functions/PositionImpl.h | 6 + src/Functions/Regexps.h | 20 +- ...tringsearch_with_nonconst_needle.reference | 190 ++++++++++++ ...2294_stringsearch_with_nonconst_needle.sql | 36 +++ 9 files changed, 529 insertions(+), 46 deletions(-) create mode 100644 tests/queries/0_stateless/02294_stringsearch_with_nonconst_needle.reference create mode 100644 tests/queries/0_stateless/02294_stringsearch_with_nonconst_needle.sql diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index c53d1a204e7..c8cef81333a 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -230,6 +230,12 @@ struct CountSubstringsImpl { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } + + template + static void vectorFixedVector(Args &&...) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); + } }; } diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index 27b93fc0240..68425ee496e 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -115,8 +115,6 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { - using ResultType = typename Impl::ResultType; - const ColumnPtr & column_haystack = arguments[0].column; const ColumnPtr & column_needle = arguments[1].column; @@ -127,6 +125,8 @@ public: const ColumnConst * col_haystack_const = typeid_cast(&*column_haystack); const ColumnConst * col_needle_const = typeid_cast(&*column_needle); + using ResultType = typename Impl::ResultType; + if constexpr (!Impl::use_default_implementation_for_constants) { bool is_col_start_pos_const = column_start_pos == nullptr || isColumnConst(*column_start_pos); @@ -173,6 +173,14 @@ public: col_needle_const->getValue(), column_start_pos, vec_res); + else if (col_haystack_vector_fixed && col_needle_vector) + Impl::vectorFixedVector( + col_haystack_vector_fixed->getChars(), + col_haystack_vector_fixed->getN(), + col_needle_vector->getChars(), + col_needle_vector->getOffsets(), + column_start_pos, + vec_res); else if (col_haystack_vector_fixed && col_needle_const) Impl::vectorFixedConstant( col_haystack_vector_fixed->getChars(), diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h index c8ba625b835..5f86923b0d1 100644 --- a/src/Functions/FunctionsVisitParam.h +++ b/src/Functions/FunctionsVisitParam.h @@ -147,6 +147,12 @@ struct ExtractParamImpl { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } + + template + static void vectorFixedVector(Args &&...) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); + } }; diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index a6280b4d368..9328bd99139 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -91,6 +91,12 @@ struct HasTokenImpl { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } + + template + static void vectorFixedVector(Args &&...) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); + } }; } diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 4be0f955091..5e7efd0a94a 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -21,10 +21,11 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } -namespace impl { +namespace impl +{ /// Is the [I]LIKE expression reduced to finding a substring in a string? -inline bool likePatternIsSubstring(const String & pattern, String & res) +inline bool likePatternIsSubstring(std::string_view pattern, String & res) { if (pattern.size() < 2 || pattern.front() != '%' || pattern.back() != '%') return false; @@ -75,7 +76,7 @@ struct MatchImpl static constexpr bool supports_start_pos = false; static constexpr auto name = Name::name; - static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2};} + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {2};} using ResultType = UInt8; @@ -87,10 +88,10 @@ struct MatchImpl const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const String & needle, - const ColumnPtr & start_pos, + const ColumnPtr & start_pos_, PaddedPODArray & res) { - if (start_pos != nullptr) + if (start_pos_ != nullptr) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' doesn't support start_pos argument", name); @@ -211,20 +212,17 @@ struct MatchImpl * so that it can match when `required_substring` occurs into the string several times, * and at the first occurrence, the regexp is not a match. */ + const size_t start_pos = (required_substring_is_prefix) ? (reinterpret_cast(pos) - str_data) : 0; + const size_t end_pos = str_size; - if (required_substring_is_prefix) - res[i] = negate - ^ regexp->getRE2()->Match( - {str_data, str_size}, - reinterpret_cast(pos) - str_data, - str_size, - re2_st::RE2::UNANCHORED, - nullptr, - 0); - else - res[i] = negate - ^ regexp->getRE2()->Match( - {str_data, str_size}, 0, str_size, re2_st::RE2::UNANCHORED, nullptr, 0); + res[i] = negate + ^ regexp->getRE2()->Match( + {str_data, str_size}, + start_pos, + end_pos, + re2_st::RE2::UNANCHORED, + nullptr, + 0); } } else @@ -373,20 +371,17 @@ struct MatchImpl * so that it can match when `required_substring` occurs into the string several times, * and at the first occurrence, the regexp is not a match. */ + const size_t start_pos = (required_substring_is_prefix) ? (reinterpret_cast(pos) - str_data) : 0; + const size_t end_pos = N; - if (required_substring_is_prefix) - res[i] = negate - ^ regexp->getRE2()->Match( - {str_data, N}, - reinterpret_cast(pos) - str_data, - N, - re2_st::RE2::UNANCHORED, - nullptr, - 0); - else - res[i] = negate - ^ regexp->getRE2()->Match( - {str_data, N}, 0, N, re2_st::RE2::UNANCHORED, nullptr, 0); + res[i] = negate + ^ regexp->getRE2()->Match( + {str_data, N}, + start_pos, + end_pos, + re2_st::RE2::UNANCHORED, + nullptr, + 0); } } else @@ -404,17 +399,243 @@ struct MatchImpl } } - template - static void vectorVector(Args &&...) + static void vectorVector( + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, + const ColumnString::Chars & needle_data, + const ColumnString::Offsets & needle_offset, + const ColumnPtr & start_pos_, + PaddedPODArray & res) { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needle argument", name); + const size_t haystack_size = haystack_offsets.size(); + + if (haystack_size != needle_offset.size()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function '{}' unexpectedly received a different number of haystacks and needles", name); + + if (start_pos_ != nullptr) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function '{}' doesn't support start_pos argument", name); + + if (haystack_offsets.empty()) + return; + + String required_substr; + bool is_trivial; + bool required_substring_is_prefix; /// for `anchored` execution of the regexp. + + size_t prev_haystack_offset = 0; + size_t prev_needle_offset = 0; + + for (size_t i = 0; i < haystack_size; ++i) + { + const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; + const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; + + const auto * const cur_needle_data = &needle_data[prev_needle_offset]; + const size_t cur_needle_length = needle_offset[i] - prev_needle_offset - 1; + + const auto & needle = String( + reinterpret_cast(cur_needle_data), + cur_needle_length); + + if (like && impl::likePatternIsSubstring(needle, required_substr)) + { + if (required_substr.size() > cur_haystack_length) + res[i] = negate; + else + { + Searcher searcher(required_substr.data(), required_substr.size(), cur_haystack_length); + const auto * match = searcher.search(cur_haystack_data, cur_haystack_length); + res[i] = negate + ^ (match != cur_haystack_data + cur_haystack_length); + } + } + else + { + // each row is expected to contain a different like/re2 pattern + // --> bypass the regexp cache, instead construct the pattern on-the-fly + const int flags = Regexps::buildRe2Flags(); + const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); + + regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); + + if (required_substr.empty()) + { + if (!regexp.getRE2()) /// An empty regexp. Always matches. + { + res[i] = 1; + } + else + { + res[i] = negate + ^ regexp.getRE2()->Match( + {reinterpret_cast(cur_haystack_data), cur_haystack_length}, + 0, + cur_haystack_length, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + } + } + else + { + Searcher searcher(required_substr.data(), required_substr.size(), cur_haystack_length); + const auto * match = searcher.search(cur_haystack_data, cur_haystack_length); + + if (match == cur_haystack_data + cur_haystack_length) + { + res[i] = negate; // no match + } + else + { + if (is_trivial) + { + res[i] = !negate; // no wildcards in pattern + } + else + { + const size_t start_pos = (required_substring_is_prefix) ? (match - cur_haystack_data) : 0; + const size_t end_pos = cur_haystack_length; + + res[i] = negate + ^ regexp.getRE2()->Match( + {reinterpret_cast(cur_haystack_data), cur_haystack_length}, + start_pos, + end_pos, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + } + } + } + } + + prev_haystack_offset = haystack_offsets[i]; + prev_needle_offset = needle_offset[i]; + } + } + + static void vectorFixedVector( + const ColumnString::Chars & haystack, + size_t N, + const ColumnString::Chars & needle_data, + const ColumnString::Offsets & needle_offset, + const ColumnPtr & start_pos_, + PaddedPODArray & res) + { + const size_t haystack_size = haystack.size()/N; + + if (haystack_size != needle_offset.size()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function '{}' unexpectedly received a different number of haystacks and needles", name); + + if (start_pos_ != nullptr) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function '{}' doesn't support start_pos argument", name); + + if (haystack.empty()) + return; + + String required_substr; + bool is_trivial; + bool required_substring_is_prefix; // for `anchored` execution of the regexp. + + size_t prev_haystack_offset = 0; + size_t prev_needle_offset = 0; + + for (size_t i = 0; i < haystack_size; ++i) + { + const auto * const cur_haystack_data = &haystack[prev_haystack_offset]; + const size_t cur_haystack_length = N; + + const auto * const cur_needle_data = &needle_data[prev_needle_offset]; + const size_t cur_needle_length = needle_offset[i] - prev_needle_offset - 1; + + const auto & needle = String( + reinterpret_cast(cur_needle_data), + cur_needle_length); + + if (like && impl::likePatternIsSubstring(needle, required_substr)) + { + if (required_substr.size() > cur_haystack_length) + res[i] = negate; + else + { + Searcher searcher(required_substr.data(), required_substr.size(), cur_haystack_length); + const auto * match = searcher.search(cur_haystack_data, cur_haystack_length); + res[i] = negate + ^ (match != cur_haystack_data + cur_haystack_length); + } + } + else + { + // each row is expected to contain a different like/re2 pattern + // --> bypass the regexp cache, instead construct the pattern on-the-fly + const int flags = Regexps::buildRe2Flags(); + const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); + + regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); + + if (required_substr.empty()) + { + if (!regexp.getRE2()) /// An empty regexp. Always matches. + { + res[i] = 1; + } + else + { + res[i] = negate + ^ regexp.getRE2()->Match( + {reinterpret_cast(cur_haystack_data), cur_haystack_length}, + 0, + cur_haystack_length, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + } + } + else + { + Searcher searcher(required_substr.data(), required_substr.size(), cur_haystack_length); + const auto * match = searcher.search(cur_haystack_data, cur_haystack_length); + + if (match == cur_haystack_data + cur_haystack_length) + { + res[i] = negate; // no match + } + else + { + if (is_trivial) + { + res[i] = !negate; // no wildcards in pattern + } + else + { + const size_t start_pos = (required_substring_is_prefix) ? (match - cur_haystack_data) : 0; + const size_t end_pos = cur_haystack_length; + + res[i] = negate + ^ regexp.getRE2()->Match( + {reinterpret_cast(cur_haystack_data), cur_haystack_length}, + start_pos, + end_pos, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + } + } + } + } + prev_haystack_offset += N; + prev_needle_offset = needle_offset[i]; + } } - /// Search different needles in single haystack. template static void constantVector(Args &&...) { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support non-constant needle argument", name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support search with non-constant needles in constant haystack", name); } }; diff --git a/src/Functions/PositionImpl.h b/src/Functions/PositionImpl.h index 07f77b652d1..82e58cdc643 100644 --- a/src/Functions/PositionImpl.h +++ b/src/Functions/PositionImpl.h @@ -413,6 +413,12 @@ struct PositionImpl { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } + + template + static void vectorFixedVector(Args &&...) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); + } }; } diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 5c38583934a..dc94b75211c 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -50,6 +50,17 @@ namespace Regexps return {pattern, flags}; } + template + inline int buildRe2Flags() + { + int flags = OptimizedRegularExpression::RE_DOT_NL; + if constexpr (no_capture) + flags |= OptimizedRegularExpression::RE_NO_CAPTURE; + if constexpr (case_insensitive) + flags |= OptimizedRegularExpression::RE_CASELESS; + return flags; + } + /** Returns holder of an object from Pool. * You must hold the ownership while using the object. * In destructor, it returns the object back to the Pool for further reuse. @@ -62,14 +73,7 @@ namespace Regexps return known_regexps.get(pattern, [&pattern] { - int flags = OptimizedRegularExpression::RE_DOT_NL; - - if (no_capture) - flags |= OptimizedRegularExpression::RE_NO_CAPTURE; - - if (case_insensitive) - flags |= OptimizedRegularExpression::RE_CASELESS; - + const int flags = buildRe2Flags(); ProfileEvents::increment(ProfileEvents::RegexpCreated); return new Regexp{createRegexp(pattern, flags)}; }); diff --git a/tests/queries/0_stateless/02294_stringsearch_with_nonconst_needle.reference b/tests/queries/0_stateless/02294_stringsearch_with_nonconst_needle.reference new file mode 100644 index 00000000000..7471bcad00c --- /dev/null +++ b/tests/queries/0_stateless/02294_stringsearch_with_nonconst_needle.reference @@ -0,0 +1,190 @@ +LIKE +1 Hello 0 +2 Hello % 1 +3 Hello %% 1 +4 Hello %%% 1 +5 Hello %_% 1 +6 Hello _ 0 +7 Hello _% 1 +8 Hello %_ 1 +9 Hello H%o 1 +10 hello H%0 0 +11 hello h%o 1 +12 Hello h%o 0 +13 OHello %lhell% 0 +14 OHello %hell% 0 +15 hEllo %HEL% 0 +16 abcdef %aBc%def% 0 +17 ABCDDEF %abc%def% 0 +18 Abc\nDef %abc%def% 0 +19 abc\ntdef %abc%def% 1 +20 abct\ndef %abc%dEf% 0 +21 abc\n\ndeF %abc%def% 0 +22 abc\n\ntdef %abc%deF% 0 +23 Abc\nt\ndef %abc%def% 0 +24 abct\n\ndef %abc%def% 1 +25 ab\ndef %Abc%def% 0 +26 aBc\nef %ABC%DEF% 0 +27 ёЁё Ё%Ё 0 +28 ощщЁё Щ%Ё 0 +29 ощЩЁё %Щ%Ё 0 +30 Щущпандер %щп%е% 1 +31 Щущпандер %щП%е% 0 +32 ощщЁё %щ% 1 +33 ощЩЁё %ё% 1 +34 Hello .* 0 +35 Hello .*ell.* 0 +36 Hello o$ 0 +37 Hello hE.*lO 0 +NOT LIKE +1 Hello 1 +2 Hello % 0 +3 Hello %% 0 +4 Hello %%% 0 +5 Hello %_% 0 +6 Hello _ 1 +7 Hello _% 0 +8 Hello %_ 0 +9 Hello H%o 0 +10 hello H%0 1 +11 hello h%o 0 +12 Hello h%o 1 +13 OHello %lhell% 1 +14 OHello %hell% 1 +15 hEllo %HEL% 1 +16 abcdef %aBc%def% 1 +17 ABCDDEF %abc%def% 1 +18 Abc\nDef %abc%def% 1 +19 abc\ntdef %abc%def% 0 +20 abct\ndef %abc%dEf% 1 +21 abc\n\ndeF %abc%def% 1 +22 abc\n\ntdef %abc%deF% 1 +23 Abc\nt\ndef %abc%def% 1 +24 abct\n\ndef %abc%def% 0 +25 ab\ndef %Abc%def% 1 +26 aBc\nef %ABC%DEF% 1 +27 ёЁё Ё%Ё 1 +28 ощщЁё Щ%Ё 1 +29 ощЩЁё %Щ%Ё 1 +30 Щущпандер %щп%е% 0 +31 Щущпандер %щП%е% 1 +32 ощщЁё %щ% 0 +33 ощЩЁё %ё% 0 +34 Hello .* 1 +35 Hello .*ell.* 1 +36 Hello o$ 1 +37 Hello hE.*lO 1 +ILIKE +1 Hello 0 +2 Hello % 1 +3 Hello %% 1 +4 Hello %%% 1 +5 Hello %_% 1 +6 Hello _ 0 +7 Hello _% 1 +8 Hello %_ 1 +9 Hello H%o 1 +10 hello H%0 0 +11 hello h%o 1 +12 Hello h%o 1 +13 OHello %lhell% 0 +14 OHello %hell% 1 +15 hEllo %HEL% 1 +16 abcdef %aBc%def% 1 +17 ABCDDEF %abc%def% 1 +18 Abc\nDef %abc%def% 1 +19 abc\ntdef %abc%def% 1 +20 abct\ndef %abc%dEf% 1 +21 abc\n\ndeF %abc%def% 1 +22 abc\n\ntdef %abc%deF% 1 +23 Abc\nt\ndef %abc%def% 1 +24 abct\n\ndef %abc%def% 1 +25 ab\ndef %Abc%def% 0 +26 aBc\nef %ABC%DEF% 0 +27 ёЁё Ё%Ё 1 +28 ощщЁё Щ%Ё 0 +29 ощЩЁё %Щ%Ё 1 +30 Щущпандер %щп%е% 1 +31 Щущпандер %щП%е% 1 +32 ощщЁё %щ% 1 +33 ощЩЁё %ё% 1 +34 Hello .* 0 +35 Hello .*ell.* 0 +36 Hello o$ 0 +37 Hello hE.*lO 0 +NOT ILIKE +1 Hello 1 +2 Hello % 0 +3 Hello %% 0 +4 Hello %%% 0 +5 Hello %_% 0 +6 Hello _ 1 +7 Hello _% 0 +8 Hello %_ 0 +9 Hello H%o 0 +10 hello H%0 1 +11 hello h%o 0 +12 Hello h%o 0 +13 OHello %lhell% 1 +14 OHello %hell% 0 +15 hEllo %HEL% 0 +16 abcdef %aBc%def% 0 +17 ABCDDEF %abc%def% 0 +18 Abc\nDef %abc%def% 0 +19 abc\ntdef %abc%def% 0 +20 abct\ndef %abc%dEf% 0 +21 abc\n\ndeF %abc%def% 0 +22 abc\n\ntdef %abc%deF% 0 +23 Abc\nt\ndef %abc%def% 0 +24 abct\n\ndef %abc%def% 0 +25 ab\ndef %Abc%def% 1 +26 aBc\nef %ABC%DEF% 1 +27 ёЁё Ё%Ё 0 +28 ощщЁё Щ%Ё 1 +29 ощЩЁё %Щ%Ё 0 +30 Щущпандер %щп%е% 0 +31 Щущпандер %щП%е% 0 +32 ощщЁё %щ% 0 +33 ощЩЁё %ё% 0 +34 Hello .* 1 +35 Hello .*ell.* 1 +36 Hello o$ 1 +37 Hello hE.*lO 1 +MATCH +1 Hello 1 +2 Hello % 0 +3 Hello %% 0 +4 Hello %%% 0 +5 Hello %_% 0 +6 Hello _ 0 +7 Hello _% 0 +8 Hello %_ 0 +9 Hello H%o 0 +10 hello H%0 0 +11 hello h%o 0 +12 Hello h%o 0 +13 OHello %lhell% 0 +14 OHello %hell% 0 +15 hEllo %HEL% 0 +16 abcdef %aBc%def% 0 +17 ABCDDEF %abc%def% 0 +18 Abc\nDef %abc%def% 0 +19 abc\ntdef %abc%def% 0 +20 abct\ndef %abc%dEf% 0 +21 abc\n\ndeF %abc%def% 0 +22 abc\n\ntdef %abc%deF% 0 +23 Abc\nt\ndef %abc%def% 0 +24 abct\n\ndef %abc%def% 0 +25 ab\ndef %Abc%def% 0 +26 aBc\nef %ABC%DEF% 0 +27 ёЁё Ё%Ё 0 +28 ощщЁё Щ%Ё 0 +29 ощЩЁё %Щ%Ё 0 +30 Щущпандер %щп%е% 0 +31 Щущпандер %щП%е% 0 +32 ощщЁё %щ% 0 +33 ощЩЁё %ё% 0 +34 Hello .* 1 +35 Hello .*ell.* 1 +36 Hello o$ 1 +37 Hello hE.*lO 0 diff --git a/tests/queries/0_stateless/02294_stringsearch_with_nonconst_needle.sql b/tests/queries/0_stateless/02294_stringsearch_with_nonconst_needle.sql new file mode 100644 index 00000000000..3057e342733 --- /dev/null +++ b/tests/queries/0_stateless/02294_stringsearch_with_nonconst_needle.sql @@ -0,0 +1,36 @@ +drop table if exists non_const_needle; + +create table non_const_needle + (id UInt32, haystack String, needle String) + engine = MergeTree() + order by id; + +-- 1 - 33: LIKE-syntax, 34-37: re2-syntax +insert into non_const_needle values (1, 'Hello', '') (2, 'Hello', '%') (3, 'Hello', '%%') (4, 'Hello', '%%%') (5, 'Hello', '%_%') (6, 'Hello', '_') (7, 'Hello', '_%') (8, 'Hello', '%_') (9, 'Hello', 'H%o') (10, 'hello', 'H%0') (11, 'hello', 'h%o') (12, 'Hello', 'h%o') (13, 'OHello', '%lhell%') (14, 'OHello', '%hell%') (15, 'hEllo', '%HEL%') (16, 'abcdef', '%aBc%def%') (17, 'ABCDDEF', '%abc%def%') (18, 'Abc\nDef', '%abc%def%') (19, 'abc\ntdef', '%abc%def%') (20, 'abct\ndef', '%abc%dEf%') (21, 'abc\n\ndeF', '%abc%def%') (22, 'abc\n\ntdef', '%abc%deF%') (23, 'Abc\nt\ndef', '%abc%def%') (24, 'abct\n\ndef', '%abc%def%') (25, 'ab\ndef', '%Abc%def%') (26, 'aBc\nef', '%ABC%DEF%') (27, 'ёЁё', 'Ё%Ё') (28, 'ощщЁё', 'Щ%Ё') (29, 'ощЩЁё', '%Щ%Ё') (30, 'Щущпандер', '%щп%е%') (31, 'Щущпандер', '%щП%е%') (32, 'ощщЁё', '%щ%') (33, 'ощЩЁё', '%ё%') (34, 'Hello', '.*') (35, 'Hello', '.*ell.*') (36, 'Hello', 'o$') (37, 'Hello', 'hE.*lO'); + +select 'LIKE'; +select id, haystack, needle, like(haystack, needle) + from non_const_needle + order by id; + +select 'NOT LIKE'; +select id, haystack, needle, not like(haystack, needle) + from non_const_needle + order by id; + +select 'ILIKE'; +select id, haystack, needle, ilike(haystack, needle) + from non_const_needle + order by id; + +select 'NOT ILIKE'; +select id, haystack, needle, not ilike(haystack, needle) + from non_const_needle + order by id; + +select 'MATCH'; +select id, haystack, needle, match(haystack, needle) + from non_const_needle + order by id; + +drop table if exists non_const_needle; From 6d2fab49f424d83c6dd382e51d150095211c0802 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 23 May 2022 10:18:22 +0200 Subject: [PATCH 395/615] Add join_algorithm='parallel_hash' to stress test --- docker/test/stress/stress | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index d78de84f60d..94fdfd536a7 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -3,8 +3,6 @@ from multiprocessing import cpu_count from subprocess import Popen, call, check_output, STDOUT import os -import sys -import shutil import argparse import logging import time @@ -31,6 +29,9 @@ def get_options(i, backward_compatibility_check): if i % 5 == 1: client_options.append("join_use_nulls=1") + if i % 15 == 1: + client_options.append("join_algorithm='parallel_hash'") + if i % 15 == 6: client_options.append("join_algorithm='partial_merge'") From 7897a5bac7b8d187e198868767f4ddd362758f12 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 10:18:24 +0200 Subject: [PATCH 396/615] Perf test for Norm and Distance fuctions for arrays and tuples --- tests/performance/norm_distance.xml | 69 +++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/performance/norm_distance.xml diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml new file mode 100644 index 00000000000..bd9db76554c --- /dev/null +++ b/tests/performance/norm_distance.xml @@ -0,0 +1,69 @@ + + + CREATE TABLE vecs_d ( + v Array(Float32) + ) ENGINE=Memory; + + + + + + INSERT INTO vecs_d + SELECT v FROM ( + SELECT + number AS n, + [ + rand(n*10), + rand(n*10+1), + rand(n*10+2), + rand(n*10+3), + rand(n*10+4), + rand(n*10+5), + rand(n*10+6), + rand(n*10+7), + rand(n*10+8), + rand(n*10+9) + ] AS v + FROM system.numbers + LIMIT 30000000 + ); + + + + + + CREATE TABLE tuples_d ( + t Tuple(Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32) + ) ENGINE=Memory; + + + + INSERT INTO tuples_d + SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_d; + + + + + select sum(dist) from (select L1Norm(t) as dist from tuples_d) + select sum(dist) from (select L2Norm(t) as dist from tuples_d) + select sum(dist) from (select LinfNorm(t) as dist from tuples_d) + + select sum(dist) from (select L1Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + select sum(dist) from (select L2Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + select sum(dist) from (select LinfDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + select sum(dist) from (select cosineDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + + + + select sum(dist) from (select arrayL1Norm(v) as dist from vecs_d) + select sum(dist) from (select arrayL2Norm(v) as dist from vecs_d) + select sum(dist) from (select arrayLinfNorm(v) as dist from vecs_d) + + select sum(dist) from (select arrayL1Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + select sum(dist) from (select arrayL2Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + select sum(dist) from (select arrayLinfDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + select sum(dist) from (select arrayCosineDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + + DROP TABLE vecs_d + DROP TABLE tuples_d + From e481a707aaeebecb9967d2c642191cab16cecf5b Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 23 May 2022 11:51:57 +0300 Subject: [PATCH 397/615] Fixed test --- .../02020_alter_table_modify_comment.reference | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.reference b/tests/queries/0_stateless/02020_alter_table_modify_comment.reference index a9c37eb2ba5..d2e74fd07f0 100644 --- a/tests/queries/0_stateless/02020_alter_table_modify_comment.reference +++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.reference @@ -124,21 +124,21 @@ CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\n comment= Test table with comment change a comment -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'new comment on a table\' +comment= new comment on a table remove a comment -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192 +comment= add a comment back -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table detach table -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' re-attach table -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table From 98bb34f2f259681803ff56e770ca53ef3aa67eba Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 10:59:33 +0200 Subject: [PATCH 398/615] FunctionBinaryRepresentation style fixes --- ...epr.cpp => FunctionsBinaryRepresentation.cpp} | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) rename src/Functions/{FunctionsBinaryRepr.cpp => FunctionsBinaryRepresentation.cpp} (97%) diff --git a/src/Functions/FunctionsBinaryRepr.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp similarity index 97% rename from src/Functions/FunctionsBinaryRepr.cpp rename to src/Functions/FunctionsBinaryRepresentation.cpp index 4dd11a849a0..582dd1f1049 100644 --- a/src/Functions/FunctionsBinaryRepr.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -253,13 +253,13 @@ struct UnbinImpl /// Encode number or string to string with binary or hexadecimal representation template -class EncodeToBinaryRepr : public IFunction +class EncodeToBinaryRepresentation : public IFunction { public: static constexpr auto name = Impl::name; static constexpr size_t word_size = Impl::word_size; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } @@ -550,12 +550,12 @@ public: /// Decode number or string from string with binary or hexadecimal representation template -class DecodeFromBinaryRepr : public IFunction +class DecodeFromBinaryRepresentation : public IFunction { public: static constexpr auto name = Impl::name; static constexpr size_t word_size = Impl::word_size; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } @@ -623,10 +623,10 @@ public: void registerFunctionsBinaryRepr(FunctionFactory & factory) { - factory.registerFunction>(FunctionFactory::CaseInsensitive); - factory.registerFunction>(FunctionFactory::CaseInsensitive); - factory.registerFunction>(FunctionFactory::CaseInsensitive); - factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); } } From fd217da1231e9a2be4894ec0805a1ba109e48dbc Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 20 May 2022 13:02:58 +0200 Subject: [PATCH 399/615] Restore website release --- docker/docs/release/Dockerfile | 51 ++++++++++++++++++++++++++++++++++ docker/docs/release/run.sh | 10 +++++++ docker/images.json | 4 +++ 3 files changed, 65 insertions(+) create mode 100644 docker/docs/release/Dockerfile create mode 100644 docker/docs/release/run.sh diff --git a/docker/docs/release/Dockerfile b/docker/docs/release/Dockerfile new file mode 100644 index 00000000000..2ad96d71ffe --- /dev/null +++ b/docker/docs/release/Dockerfile @@ -0,0 +1,51 @@ +# rebuild in #33610 +# docker build -t clickhouse/docs-release . +FROM ubuntu:20.04 + +# ARG for quick switch to a given ubuntu mirror +ARG apt_archive="http://archive.ubuntu.com" +RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list + +ENV LANG=C.UTF-8 + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ + wget \ + bash \ + python \ + curl \ + python3-requests \ + sudo \ + git \ + openssl \ + python3-pip \ + software-properties-common \ + language-pack-zh* \ + chinese* \ + fonts-arphic-ukai \ + fonts-arphic-uming \ + fonts-ipafont-mincho \ + fonts-ipafont-gothic \ + fonts-unfonts-core \ + xvfb \ + nodejs \ + npm \ + openjdk-11-jdk \ + ssh-client \ + && pip --no-cache-dir install scipy \ + && apt-get autoremove --yes \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN wget 'https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb' + +RUN npm i -g purify-css + +RUN pip3 install --ignore-installed --upgrade setuptools pip virtualenv + +COPY run.sh / + +ENV REPO_PATH=/repo_path +ENV OUTPUT_PATH=/output_path + +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/docs/release/run.sh b/docker/docs/release/run.sh new file mode 100644 index 00000000000..e465098c159 --- /dev/null +++ b/docker/docs/release/run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd $REPO_PATH/docs/tools +mkdir venv +virtualenv -p $(which python3) venv +source venv/bin/activate +python3 -m pip install --ignore-installed -r requirements.txt +mkdir -p ~/.ssh && ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts +./release.sh 2>&1 | tee tee $OUTPUT_PATH/output.log diff --git a/docker/images.json b/docker/images.json index 9b7d44bc990..181452f17bc 100644 --- a/docker/images.json +++ b/docker/images.json @@ -146,5 +146,9 @@ "name": "clickhouse/docs-builder", "dependent": [ ] + }, + "docker/docs/release": { + "name": "clickhouse/docs-release", + "dependent": [] } } From cfe98c4aba91519a0b21bfedba3593d708055b67 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 23 May 2022 11:43:22 +0200 Subject: [PATCH 400/615] Fix publishing of content.clickhouse.com - Publish only benchmarks and data - Minimize clickhouse/docs-release - Run it as a normal user - Speed up running by not redownload virtual env deps - Clean out docs and blog buildging - Minimize docs/tools/requirements.txt --- .github/workflows/docs_release.yml | 11 +- docker/docs/release/Dockerfile | 14 +- docker/docs/release/run.sh | 14 +- docs/tools/blog.py | 113 ------------ docs/tools/build.py | 152 ---------------- docs/tools/cmake_in_clickhouse_generator.py | 181 ------------------- docs/tools/easy_diff.py | 186 ------------------- docs/tools/github.py | 41 ----- docs/tools/nav.py | 190 -------------------- docs/tools/redirects.py | 39 ---- docs/tools/requirements.txt | 57 +++--- docs/tools/util.py | 2 +- docs/tools/webpack.config.js | 81 --------- tests/ci/docs_release.py | 6 +- 14 files changed, 48 insertions(+), 1039 deletions(-) delete mode 100644 docs/tools/blog.py delete mode 100644 docs/tools/cmake_in_clickhouse_generator.py delete mode 100755 docs/tools/easy_diff.py delete mode 100644 docs/tools/github.py delete mode 100644 docs/tools/nav.py delete mode 100644 docs/tools/webpack.config.js diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index b697fb78738..aed691844da 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -7,16 +7,17 @@ env: concurrency: group: master-release cancel-in-progress: true -on: # yamllint disable-line rule:truthy +'on': push: branches: - master paths: - - 'docs/**' - - 'website/**' - - 'benchmark/**' - - 'docker/**' - '.github/**' + - 'benchmark/**' + - 'docker/docs/release/**' + - 'docs/**' + - 'utils/list-versions/version_date.tsv' + - 'website/**' workflow_dispatch: jobs: DockerHubPushAarch64: diff --git a/docker/docs/release/Dockerfile b/docker/docs/release/Dockerfile index 2ad96d71ffe..024cf8e6cc6 100644 --- a/docker/docs/release/Dockerfile +++ b/docker/docs/release/Dockerfile @@ -20,29 +20,23 @@ RUN apt-get update \ openssl \ python3-pip \ software-properties-common \ - language-pack-zh* \ - chinese* \ fonts-arphic-ukai \ fonts-arphic-uming \ fonts-ipafont-mincho \ fonts-ipafont-gothic \ fonts-unfonts-core \ xvfb \ - nodejs \ - npm \ - openjdk-11-jdk \ ssh-client \ - && pip --no-cache-dir install scipy \ && apt-get autoremove --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -RUN wget 'https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb' - -RUN npm i -g purify-css - RUN pip3 install --ignore-installed --upgrade setuptools pip virtualenv +# We create the most popular default 1000:1000 ubuntu user to not have ssh issues when running with UID==1000 +RUN useradd --create-home --uid 1000 --user-group ubuntu \ + && ssh-keyscan -t rsa github.com >> /etc/ssh/ssh_known_hosts + COPY run.sh / ENV REPO_PATH=/repo_path diff --git a/docker/docs/release/run.sh b/docker/docs/release/run.sh index e465098c159..e5a9f2101aa 100644 --- a/docker/docs/release/run.sh +++ b/docker/docs/release/run.sh @@ -1,10 +1,12 @@ #!/usr/bin/env bash set -euo pipefail -cd $REPO_PATH/docs/tools -mkdir venv -virtualenv -p $(which python3) venv +cd "$REPO_PATH/docs/tools" +if ! [ -d venv ]; then + mkdir -p venv + virtualenv -p "$(which python3)" venv + source venv/bin/activate + python3 -m pip install --ignore-installed -r requirements.txt +fi source venv/bin/activate -python3 -m pip install --ignore-installed -r requirements.txt -mkdir -p ~/.ssh && ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts -./release.sh 2>&1 | tee tee $OUTPUT_PATH/output.log +./release.sh 2>&1 | tee "$OUTPUT_PATH/output.log" diff --git a/docs/tools/blog.py b/docs/tools/blog.py deleted file mode 100644 index 9bb6beae972..00000000000 --- a/docs/tools/blog.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python3 -import datetime -import logging -import os -import time - -import nav # monkey patches mkdocs - -import mkdocs.commands -from mkdocs import config -from mkdocs import exceptions - -import mdx_clickhouse -import redirects - -import util - - -def build_for_lang(lang, args): - logging.info(f"Building {lang} blog") - - try: - theme_cfg = { - "name": None, - "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir), - "language": lang, - "direction": "ltr", - "static_templates": ["404.html"], - "extra": { - "now": int( - time.mktime(datetime.datetime.now().timetuple()) - ) # TODO better way to avoid caching - }, - } - - # the following list of languages is sorted according to - # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers - languages = {"en": "English"} - - site_names = {"en": "ClickHouse Blog"} - - assert len(site_names) == len(languages) - - site_dir = os.path.join(args.blog_output_dir, lang) - - plugins = ["macros"] - if args.htmlproofer: - plugins.append("htmlproofer") - - website_url = "https://clickhouse.com" - site_name = site_names.get(lang, site_names["en"]) - blog_nav, post_meta = nav.build_blog_nav(lang, args) - raw_config = dict( - site_name=site_name, - site_url=f"{website_url}/blog/{lang}/", - docs_dir=os.path.join(args.blog_dir, lang), - site_dir=site_dir, - strict=True, - theme=theme_cfg, - nav=blog_nav, - copyright="©2016–2022 ClickHouse, Inc.", - use_directory_urls=True, - repo_name="ClickHouse/ClickHouse", - repo_url="https://github.com/ClickHouse/ClickHouse/", - edit_uri=f"edit/master/website/blog/{lang}", - markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, - plugins=plugins, - extra=dict( - now=datetime.datetime.now().isoformat(), - rev=args.rev, - rev_short=args.rev_short, - rev_url=args.rev_url, - website_url=website_url, - events=args.events, - languages=languages, - includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"), - is_blog=True, - post_meta=post_meta, - today=datetime.date.today().isoformat(), - ), - ) - - cfg = config.load_config(**raw_config) - mkdocs.commands.build.build(cfg) - - redirects.build_blog_redirects(args) - - env = util.init_jinja2_env(args) - with open( - os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb" - ) as f: - rss_template_string = f.read().decode("utf-8").strip() - rss_template = env.from_string(rss_template_string) - with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f: - f.write(rss_template.render({"config": raw_config})) - - logging.info(f"Finished building {lang} blog") - - except exceptions.ConfigurationError as e: - raise SystemExit("\n" + str(e)) - - -def build_blog(args): - tasks = [] - for lang in args.blog_lang.split(","): - if lang: - tasks.append( - ( - lang, - args, - ) - ) - util.run_function_in_parallel(build_for_lang, tasks, threads=False) diff --git a/docs/tools/build.py b/docs/tools/build.py index f084a8e5c0c..3756cf66794 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -1,144 +1,17 @@ #!/usr/bin/env python3 import argparse -import datetime import logging import os import shutil import subprocess import sys -import time -import jinja2 import livereload -import markdown.util -import nav # monkey patches mkdocs - -from mkdocs import config -from mkdocs import exceptions -import mkdocs.commands.build - -import blog -import mdx_clickhouse import redirects -import util import website -from cmake_in_clickhouse_generator import generate_cmake_flags_files - - -class ClickHouseMarkdown(markdown.extensions.Extension): - class ClickHousePreprocessor(markdown.util.Processor): - def run(self, lines): - for line in lines: - if "" not in line: - yield line - - def extendMarkdown(self, md): - md.preprocessors.register( - self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31 - ) - - -markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown - - -def build_for_lang(lang, args): - logging.info(f"Building {lang} docs") - - try: - theme_cfg = { - "name": None, - "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir), - "language": lang, - "direction": "rtl" if lang == "fa" else "ltr", - "static_templates": ["404.html"], - "extra": { - "now": int( - time.mktime(datetime.datetime.now().timetuple()) - ) # TODO better way to avoid caching - }, - } - - # the following list of languages is sorted according to - # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers - languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"} - - site_names = { - "en": "ClickHouse %s Documentation", - "zh": "ClickHouse文档 %s", - "ru": "Документация ClickHouse %s", - "ja": "ClickHouseドキュメント %s", - } - - assert len(site_names) == len(languages) - - site_dir = os.path.join(args.docs_output_dir, lang) - - plugins = ["macros"] - if args.htmlproofer: - plugins.append("htmlproofer") - - website_url = "https://clickhouse.com" - site_name = site_names.get(lang, site_names["en"]) % "" - site_name = site_name.replace(" ", " ") - - raw_config = dict( - site_name=site_name, - site_url=f"{website_url}/docs/{lang}/", - docs_dir=os.path.join(args.docs_dir, lang), - site_dir=site_dir, - strict=True, - theme=theme_cfg, - copyright="©2016–2022 ClickHouse, Inc.", - use_directory_urls=True, - repo_name="ClickHouse/ClickHouse", - repo_url="https://github.com/ClickHouse/ClickHouse/", - edit_uri=f"edit/master/docs/{lang}", - markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, - plugins=plugins, - extra=dict( - now=datetime.datetime.now().isoformat(), - rev=args.rev, - rev_short=args.rev_short, - rev_url=args.rev_url, - website_url=website_url, - events=args.events, - languages=languages, - includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"), - is_blog=False, - ), - ) - - raw_config["nav"] = nav.build_docs_nav(lang, args) - - cfg = config.load_config(**raw_config) - - if not args.skip_multi_page: - mkdocs.commands.build.build(cfg) - - mdx_clickhouse.PatchedMacrosPlugin.disabled = False - - logging.info(f"Finished building {lang} docs") - - except exceptions.ConfigurationError as e: - raise SystemExit("\n" + str(e)) - - -def build_docs(args): - tasks = [] - for lang in args.lang.split(","): - if lang: - tasks.append( - ( - lang, - args, - ) - ) - util.run_function_in_parallel(build_for_lang, tasks, threads=False) - redirects.build_docs_redirects(args) - def build(args): if os.path.exists(args.output_dir): @@ -147,14 +20,6 @@ def build(args): if not args.skip_website: website.build_website(args) - if not args.skip_docs: - generate_cmake_flags_files() - - build_docs(args) - - if not args.skip_blog: - blog.build_blog(args) - if not args.skip_website: website.process_benchmark_results(args) website.minify_website(args) @@ -171,20 +36,14 @@ if __name__ == "__main__": arg_parser = argparse.ArgumentParser() arg_parser.add_argument("--lang", default="en,ru,zh,ja") - arg_parser.add_argument("--blog-lang", default="en") - arg_parser.add_argument("--docs-dir", default=".") arg_parser.add_argument("--theme-dir", default=website_dir) arg_parser.add_argument("--website-dir", default=website_dir) arg_parser.add_argument("--src-dir", default=src_dir) - arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog")) arg_parser.add_argument("--output-dir", default="build") arg_parser.add_argument("--nav-limit", type=int, default="0") arg_parser.add_argument("--skip-multi-page", action="store_true") arg_parser.add_argument("--skip-website", action="store_true") - arg_parser.add_argument("--skip-blog", action="store_true") - arg_parser.add_argument("--skip-docs", action="store_true") arg_parser.add_argument("--htmlproofer", action="store_true") - arg_parser.add_argument("--no-docs-macros", action="store_true") arg_parser.add_argument("--livereload", type=int, default="0") arg_parser.add_argument("--verbose", action="store_true") @@ -196,11 +55,6 @@ if __name__ == "__main__": logging.getLogger("MARKDOWN").setLevel(logging.INFO) - args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs") - args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog") - - from github import get_events - args.rev = ( subprocess.check_output("git rev-parse HEAD", shell=True) .decode("utf-8") @@ -212,9 +66,6 @@ if __name__ == "__main__": .strip() ) args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}" - args.events = get_events(args) - - from build import build build(args) @@ -223,9 +74,6 @@ if __name__ == "__main__": new_args = sys.executable + " " + " ".join(new_args) server = livereload.Server() - server.watch( - args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True) - ) server.watch( args.website_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True), diff --git a/docs/tools/cmake_in_clickhouse_generator.py b/docs/tools/cmake_in_clickhouse_generator.py deleted file mode 100644 index 9bbc94fd206..00000000000 --- a/docs/tools/cmake_in_clickhouse_generator.py +++ /dev/null @@ -1,181 +0,0 @@ -import re -import os -from typing import TextIO, List, Tuple, Optional, Dict - -# name, default value, description -Entity = Tuple[str, str, str] - -# https://regex101.com/r/R6iogw/12 -cmake_option_regex: str = ( - r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$" -) - -ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/" - -name_str: str = '
[`{name}`](' + ch_master_url + "{path}#L{line})" -default_anchor_str: str = "[`{name}`](#{anchor})" - -comment_var_regex: str = r"\${(.+)}" -comment_var_replace: str = "`\\1`" - -table_header: str = """ -| Name | Default value | Description | Comment | -|------|---------------|-------------|---------| -""" - -# Needed to detect conditional variables (those which are defined twice) -# name -> (path, values) -entities: Dict[str, Tuple[str, str]] = {} - - -def make_anchor(t: str) -> str: - return "".join( - ["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"] - ) - - -def process_comment(comment: str) -> str: - return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE) - - -def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None: - (line, comment) = line_comment - (name, description, default) = entity - - if name in entities: - return - - if len(default) == 0: - formatted_default: str = "`OFF`" - elif default[0] == "$": - formatted_default: str = "`{}`".format(default[2:-1]) - else: - formatted_default: str = "`" + default + "`" - - formatted_name: str = name_str.format( - anchor=make_anchor(name), name=name, path=path, line=line - ) - - formatted_description: str = "".join(description.split("\n")) - - formatted_comment: str = process_comment(comment) - - formatted_entity: str = "| {} | {} | {} | {} |".format( - formatted_name, formatted_default, formatted_description, formatted_comment - ) - - entities[name] = path, formatted_entity - - -def process_file(root_path: str, file_path: str, file_name: str) -> None: - with open(os.path.join(file_path, file_name), "r") as cmake_file: - contents: str = cmake_file.read() - - def get_line_and_comment(target: str) -> Tuple[int, str]: - contents_list: List[str] = contents.split("\n") - comment: str = "" - - for n, line in enumerate(contents_list): - if "option" not in line.lower() or target not in line: - continue - - for maybe_comment_line in contents_list[n - 1 :: -1]: - if not re.match("\s*#\s*", maybe_comment_line): - break - - comment = re.sub("\s*#\s*", "", maybe_comment_line) + " " + comment - - # line numbering starts with 1 - return n + 1, comment - - matches: Optional[List[Entity]] = re.findall( - cmake_option_regex, contents, re.MULTILINE - ) - - file_rel_path_with_name: str = os.path.join( - file_path[len(root_path) :], file_name - ) - if file_rel_path_with_name.startswith("/"): - file_rel_path_with_name = file_rel_path_with_name[1:] - - if matches: - for entity in matches: - build_entity( - file_rel_path_with_name, entity, get_line_and_comment(entity[0]) - ) - - -def process_folder(root_path: str, name: str) -> None: - for root, _, files in os.walk(os.path.join(root_path, name)): - for f in files: - if f == "CMakeLists.txt" or ".cmake" in f: - process_file(root_path, root, f) - - -def generate_cmake_flags_files() -> None: - root_path: str = os.path.join(os.path.dirname(__file__), "..", "..") - - output_file_name: str = os.path.join( - root_path, "docs/en/development/cmake-in-clickhouse.md" - ) - header_file_name: str = os.path.join( - root_path, "docs/_includes/cmake_in_clickhouse_header.md" - ) - footer_file_name: str = os.path.join( - root_path, "docs/_includes/cmake_in_clickhouse_footer.md" - ) - - process_file(root_path, root_path, "CMakeLists.txt") - process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt") - - process_folder(root_path, "base") - process_folder(root_path, "cmake") - process_folder(root_path, "src") - - with open(output_file_name, "w") as f: - with open(header_file_name, "r") as header: - f.write(header.read()) - - sorted_keys: List[str] = sorted(entities.keys()) - ignored_keys: List[str] = [] - - f.write("### ClickHouse modes\n" + table_header) - - for k in sorted_keys: - if k.startswith("ENABLE_CLICKHOUSE_"): - f.write(entities[k][1] + "\n") - ignored_keys.append(k) - - f.write( - "\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" - + table_header - ) - - for k in sorted_keys: - if k.startswith("ENABLE_") and ".cmake" in entities[k][0]: - f.write(entities[k][1] + "\n") - ignored_keys.append(k) - - f.write("\n\n### Other flags\n" + table_header) - - for k in sorted(set(sorted_keys).difference(set(ignored_keys))): - f.write(entities[k][1] + "\n") - - with open(footer_file_name, "r") as footer: - f.write(footer.read()) - - other_languages = [ - "docs/ja/development/cmake-in-clickhouse.md", - "docs/zh/development/cmake-in-clickhouse.md", - "docs/ru/development/cmake-in-clickhouse.md", - ] - - for lang in other_languages: - other_file_name = os.path.join(root_path, lang) - if os.path.exists(other_file_name): - os.unlink(other_file_name) - os.symlink(output_file_name, other_file_name) - - -if __name__ == "__main__": - generate_cmake_flags_files() diff --git a/docs/tools/easy_diff.py b/docs/tools/easy_diff.py deleted file mode 100755 index 14e3ca91776..00000000000 --- a/docs/tools/easy_diff.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os, sys -import argparse -import subprocess -import contextlib -from git import cmd -from tempfile import NamedTemporaryFile - -SCRIPT_DESCRIPTION = """ - usage: ./easy_diff.py language/document path - - Show the difference between a language document and an English document. - - This script is based on the assumption that documents in other languages are fully synchronized with the en document at a commit. - - For example: - Execute: - ./easy_diff.py --no-pager zh/data_types - Output: - Need translate document:~/ClickHouse/docs/en/data_types/uuid.md - Need link document:~/ClickHouse/docs/en/data_types/decimal.md to ~/ClickHouse/docs/zh/data_types/decimal.md - diff --git a/docs/en/data_types/domains/ipv6.md b/docs/en/data_types/domains/ipv6.md - index 1bfbe3400b..e2abaff017 100644 - --- a/docs/en/data_types/domains/ipv6.md - +++ b/docs/en/data_types/domains/ipv6.md - @@ -4,13 +4,13 @@ - - ### Basic Usage - - -``` sql - +```sql - CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY url; - - DESCRIBE TABLE hits; - ``` - - -``` - +```text - ┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐ - │ url │ String │ │ │ │ │ - │ from │ IPv6 │ │ │ │ │ - @@ -19,19 +19,19 @@ DESCRIBE TABLE hits; - - OR you can use `IPv6` domain as a key: - - -``` sql - +```sql - CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; - ... MORE - - OPTIONS: - -h, --help show this help message and exit - --no-pager use stdout as difference result output -""" - -SCRIPT_PATH = os.path.abspath(__file__) -CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..") -SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME) - -SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False) -SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None) -SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False) -SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False) - - -def execute(commands): - return SCRIPT_COMMAND_EXECUTOR.execute(commands) - - -def get_hash(file_name): - return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name]) - - -def diff_file(reference_file, working_file, out): - if not os.path.exists(reference_file): - raise RuntimeError( - "reference file [" + os.path.abspath(reference_file) + "] is not exists." - ) - - if os.path.islink(working_file): - out.writelines(["Need translate document:" + os.path.abspath(reference_file)]) - elif not os.path.exists(working_file): - out.writelines( - [ - "Need link document " - + os.path.abspath(reference_file) - + " to " - + os.path.abspath(working_file) - ] - ) - elif get_hash(working_file) != get_hash(reference_file): - out.writelines( - [ - ( - execute( - [ - "git", - "diff", - get_hash(working_file).strip('"'), - reference_file, - ] - ).encode("utf-8") - ) - ] - ) - - return 0 - - -def diff_directory(reference_directory, working_directory, out): - if not os.path.isdir(reference_directory): - return diff_file(reference_directory, working_directory, out) - - for list_item in os.listdir(reference_directory): - working_item = os.path.join(working_directory, list_item) - reference_item = os.path.join(reference_directory, list_item) - if ( - diff_file(reference_item, working_item, out) - if os.path.isfile(reference_item) - else diff_directory(reference_item, working_item, out) != 0 - ): - return 1 - - return 0 - - -def find_language_doc(custom_document, other_language="en", children=[]): - if len(custom_document) == 0: - raise RuntimeError( - "The " - + os.path.join(custom_document, *children) - + " is not in docs directory." - ) - - if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document): - return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:]) - children.insert(0, os.path.split(custom_document)[1]) - return find_language_doc( - os.path.split(custom_document)[0], other_language, children - ) - - -class ToPager: - def __init__(self, temp_named_file): - self.temp_named_file = temp_named_file - - def writelines(self, lines): - self.temp_named_file.writelines(lines) - - def close(self): - self.temp_named_file.flush() - git_pager = execute(["git", "var", "GIT_PAGER"]) - subprocess.check_call([git_pager, self.temp_named_file.name]) - self.temp_named_file.close() - - -class ToStdOut: - def writelines(self, lines): - self.system_stdout_stream.writelines(lines) - - def close(self): - self.system_stdout_stream.flush() - - def __init__(self, system_stdout_stream): - self.system_stdout_stream = system_stdout_stream - - -if __name__ == "__main__": - arguments = SCRIPT_COMMAND_PARSER.parse_args() - if arguments.help or not arguments.path: - sys.stdout.write(SCRIPT_DESCRIPTION) - sys.exit(0) - - working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path) - with contextlib.closing( - ToStdOut(sys.stdout) - if arguments.no_pager - else ToPager(NamedTemporaryFile("r+")) - ) as writer: - exit( - diff_directory( - find_language_doc(working_language), working_language, writer - ) - ) diff --git a/docs/tools/github.py b/docs/tools/github.py deleted file mode 100644 index 3a6f155e25d..00000000000 --- a/docs/tools/github.py +++ /dev/null @@ -1,41 +0,0 @@ -import collections -import copy -import io -import logging -import os -import random -import sys -import tarfile -import time - -import requests - -import util - - -def get_events(args): - events = [] - skip = True - with open(os.path.join(args.docs_dir, "..", "README.md")) as f: - for line in f: - if skip: - if "Upcoming Events" in line: - skip = False - else: - if not line: - continue - line = line.strip().split("](") - if len(line) == 2: - tail = line[1].split(") ") - events.append( - { - "signup_link": tail[0], - "event_name": line[0].replace("* [", ""), - "event_date": tail[1].replace("on ", "").replace(".", ""), - } - ) - return events - - -if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) diff --git a/docs/tools/nav.py b/docs/tools/nav.py deleted file mode 100644 index e3df85bbe4e..00000000000 --- a/docs/tools/nav.py +++ /dev/null @@ -1,190 +0,0 @@ -import collections -import datetime -import hashlib -import logging -import os - -import mkdocs.structure.nav - -import util - - -def find_first_header(content): - for line in content.split("\n"): - if line.startswith("#"): - no_hash = line.lstrip("#") - return no_hash.split("{", 1)[0].strip() - - -def build_nav_entry(root, args): - if root.endswith("images"): - return None, None, None - result_items = [] - index_meta, index_content = util.read_md_file(os.path.join(root, "index.md")) - current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title")) - current_title = current_title or index_meta.get( - "title", find_first_header(index_content) - ) - for filename in os.listdir(root): - path = os.path.join(root, filename) - if os.path.isdir(path): - prio, title, payload = build_nav_entry(path, args) - if title and payload: - result_items.append((prio, title, payload)) - elif filename.endswith(".md"): - path = os.path.join(root, filename) - - meta = "" - content = "" - - try: - meta, content = util.read_md_file(path) - except: - print("Error in file: {}".format(path)) - raise - - path = path.split("/", 2)[-1] - title = meta.get("toc_title", find_first_header(content)) - if title: - title = title.strip().rstrip(".") - else: - title = meta.get("toc_folder_title", "hidden") - prio = meta.get("toc_priority", 9999) - logging.debug(f"Nav entry: {prio}, {title}, {path}") - if meta.get("toc_hidden") or not content.strip(): - title = "hidden" - if title == "hidden": - title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest() - if args.nav_limit and len(result_items) >= args.nav_limit: - break - result_items.append((prio, title, path)) - result_items = sorted(result_items, key=lambda x: (x[0], x[1])) - result = collections.OrderedDict([(item[1], item[2]) for item in result_items]) - if index_meta.get("toc_hidden_folder"): - current_title += "|hidden-folder" - return index_meta.get("toc_priority", 10000), current_title, result - - -def build_docs_nav(lang, args): - docs_dir = os.path.join(args.docs_dir, lang) - _, _, nav = build_nav_entry(docs_dir, args) - result = [] - index_key = None - for key, value in list(nav.items()): - if key and value: - if value == "index.md": - index_key = key - continue - result.append({key: value}) - if args.nav_limit and len(result) >= args.nav_limit: - break - if index_key: - key = list(result[0].keys())[0] - result[0][key][index_key] = "index.md" - result[0][key].move_to_end(index_key, last=False) - return result - - -def build_blog_nav(lang, args): - blog_dir = os.path.join(args.blog_dir, lang) - years = sorted(os.listdir(blog_dir), reverse=True) - result_nav = [{"hidden": "index.md"}] - post_meta = collections.OrderedDict() - for year in years: - year_dir = os.path.join(blog_dir, year) - if not os.path.isdir(year_dir): - continue - result_nav.append({year: collections.OrderedDict()}) - posts = [] - post_meta_items = [] - for post in os.listdir(year_dir): - post_path = os.path.join(year_dir, post) - if not post.endswith(".md"): - raise RuntimeError( - f"Unexpected non-md file in posts folder: {post_path}" - ) - meta, _ = util.read_md_file(post_path) - post_date = meta["date"] - post_title = meta["title"] - if datetime.date.fromisoformat(post_date) > datetime.date.today(): - continue - posts.append( - ( - post_date, - post_title, - os.path.join(year, post), - ) - ) - if post_title in post_meta: - raise RuntimeError(f"Duplicate post title: {post_title}") - if not post_date.startswith(f"{year}-"): - raise RuntimeError( - f"Post date {post_date} doesn't match the folder year {year}: {post_title}" - ) - post_url_part = post.replace(".md", "") - post_meta_items.append( - ( - post_date, - { - "date": post_date, - "title": post_title, - "image": meta.get("image"), - "url": f"/blog/{lang}/{year}/{post_url_part}/", - }, - ) - ) - for _, title, path in sorted(posts, reverse=True): - result_nav[-1][year][title] = path - for _, post_meta_item in sorted( - post_meta_items, reverse=True, key=lambda item: item[0] - ): - post_meta[post_meta_item["title"]] = post_meta_item - return result_nav, post_meta - - -def _custom_get_navigation(files, config): - nav_config = config["nav"] or mkdocs.structure.nav.nest_paths( - f.src_path for f in files.documentation_pages() - ) - items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config) - if not isinstance(items, list): - items = [items] - - pages = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Page) - - mkdocs.structure.nav._add_previous_and_next_links(pages) - mkdocs.structure.nav._add_parent_links(items) - - missing_from_config = [ - file for file in files.documentation_pages() if file.page is None - ] - if missing_from_config: - files._files = [ - file for file in files._files if file not in missing_from_config - ] - - links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link) - for link in links: - scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse( - link.url - ) - if scheme or netloc: - mkdocs.structure.nav.log.debug( - "An external link to '{}' is included in " - "the 'nav' configuration.".format(link.url) - ) - elif link.url.startswith("/"): - mkdocs.structure.nav.log.debug( - "An absolute path to '{}' is included in the 'nav' configuration, " - "which presumably points to an external resource.".format(link.url) - ) - else: - msg = ( - "A relative path to '{}' is included in the 'nav' configuration, " - "which is not found in the documentation files".format(link.url) - ) - mkdocs.structure.nav.log.warning(msg) - return mkdocs.structure.nav.Navigation(items, pages) - - -mkdocs.structure.nav.get_navigation = _custom_get_navigation diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py index 5d222376683..1b5490a040f 100644 --- a/docs/tools/redirects.py +++ b/docs/tools/redirects.py @@ -27,45 +27,6 @@ def write_redirect_html(out_path, to_url): ) -def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path): - out_path = os.path.join( - output_dir, - lang, - from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"), - ) - target_path = to_path.replace("/index.md", "/").replace(".md", "/") - - if target_path[0:7] != "http://" and target_path[0:8] != "https://": - to_url = f"/{base_prefix}/{lang}/{target_path}" - else: - to_url = target_path - - to_url = to_url.strip() - write_redirect_html(out_path, to_url) - - -def build_docs_redirects(args): - with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f: - for line in f: - for lang in args.lang.split(","): - from_path, to_path = line.split(" ", 1) - build_redirect_html( - args, "docs", lang, args.docs_output_dir, from_path, to_path - ) - - -def build_blog_redirects(args): - for lang in args.blog_lang.split(","): - redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt") - if os.path.exists(redirects_path): - with open(redirects_path, "r") as f: - for line in f: - from_path, to_path = line.split(" ", 1) - build_redirect_html( - args, "blog", lang, args.blog_output_dir, from_path, to_path - ) - - def build_static_redirects(args): for static_redirect in [ ("benchmark.html", "/benchmark/dbms/"), diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index dd641c13629..b6f2d4549e5 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -1,39 +1,32 @@ Babel==2.9.1 -backports-abc==0.5 -backports.functools-lru-cache==1.6.1 -beautifulsoup4==4.9.1 -certifi==2020.4.5.2 -chardet==3.0.4 -click==7.1.2 -closure==20191111 -cssmin==0.2.0 -future==0.18.2 -htmlmin==0.1.12 -idna==2.10 Jinja2==3.0.3 -jinja2-highlight==0.6.1 -jsmin==3.0.0 -livereload==2.6.3 Markdown==3.3.2 -MarkupSafe==2.1.0 -mkdocs==1.3.0 -mkdocs-htmlproofer-plugin==0.0.3 -mkdocs-macros-plugin==0.4.20 -nltk==3.7 -nose==1.3.7 -protobuf==3.14.0 -numpy==1.21.2 -pymdown-extensions==8.0 -python-slugify==4.0.1 +MarkupSafe==2.1.1 +MarkupSafe==2.1.1 PyYAML==6.0 -repackage==0.7.3 -requests==2.25.1 -singledispatch==3.4.0.3 +Pygments>=2.12.0 +beautifulsoup4==4.9.1 +click==7.1.2 +ghp_import==2.1.0 +importlib_metadata==4.11.4 +jinja2-highlight==0.6.1 +livereload==2.6.3 +mergedeep==1.3.4 +mkdocs-macros-plugin==0.4.20 +mkdocs-macros-test==0.1.0 +mkdocs-material==8.2.15 +mkdocs==1.3.0 +mkdocs_material_extensions==1.0.3 +packaging==21.3 +pygments==2.12.0 +pymdown_extensions==9.4 +pyparsing==3.0.9 +python-slugify==4.0.1 +python_dateutil==2.8.2 +pytz==2022.1 six==1.15.0 -soupsieve==2.0.1 +soupsieve==2.3.2 termcolor==1.1.0 +text_unidecode==1.3 tornado==6.1 -Unidecode==1.1.1 -urllib3>=1.26.8 -Pygments>=2.11.2 - +zipp==3.8.0 diff --git a/docs/tools/util.py b/docs/tools/util.py index ec670725122..a5ebb1b11b2 100644 --- a/docs/tools/util.py +++ b/docs/tools/util.py @@ -124,7 +124,7 @@ def init_jinja2_env(args): env = jinja2.Environment( loader=jinja2.FileSystemLoader( - [args.website_dir, os.path.join(args.docs_dir, "_includes")] + [args.website_dir, os.path.join(args.src_dir, "docs", "_includes")] ), extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"], ) diff --git a/docs/tools/webpack.config.js b/docs/tools/webpack.config.js deleted file mode 100644 index e0dea964101..00000000000 --- a/docs/tools/webpack.config.js +++ /dev/null @@ -1,81 +0,0 @@ -const path = require('path'); -const jsPath = path.resolve(__dirname, '../../website/src/js'); -const scssPath = path.resolve(__dirname, '../../website/src/scss'); - -console.log(path.resolve(__dirname, 'node_modules/bootstrap', require('bootstrap/package.json').sass)); - -module.exports = { - - mode: ('development' === process.env.NODE_ENV) && 'development' || 'production', - - ...(('development' === process.env.NODE_ENV) && { - watch: true, - }), - - entry: [ - path.resolve(scssPath, 'bootstrap.scss'), - path.resolve(scssPath, 'main.scss'), - path.resolve(jsPath, 'main.js'), - ], - - output: { - path: path.resolve(__dirname, '../../website'), - filename: 'js/main.js', - }, - - resolve: { - alias: { - bootstrap: path.resolve(__dirname, 'node_modules/bootstrap', require('bootstrap/package.json').sass), - }, - }, - - module: { - rules: [{ - test: /\.js$/, - exclude: /(node_modules)/, - use: [{ - loader: 'babel-loader', - options: { - presets: ['@babel/preset-env'], - }, - }], - }, { - test: /\.scss$/, - use: [{ - loader: 'file-loader', - options: { - sourceMap: true, - outputPath: (url, entryPath, context) => { - if (0 === entryPath.indexOf(scssPath)) { - const outputFile = entryPath.slice(entryPath.lastIndexOf('/') + 1, -5) - const outputPath = entryPath.slice(0, entryPath.lastIndexOf('/')).slice(scssPath.length + 1) - return `./css/${outputPath}/${outputFile}.css` - } - return `./css/${url}` - }, - }, - }, { - loader: 'postcss-loader', - options: { - options: {}, - plugins: () => ([ - require('autoprefixer'), - ('production' === process.env.NODE_ENV) && require('cssnano'), - ].filter(plugin => plugin)), - } - }, { - loader: 'sass-loader', - options: { - implementation: require('sass'), - implementation: require('sass'), - sourceMap: ('development' === process.env.NODE_ENV), - sassOptions: { - importer: require('node-sass-glob-importer')(), - precision: 10, - }, - }, - }], - }], - }, - -}; diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index b6d47326f9b..d404e79c312 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -42,8 +42,10 @@ if __name__ == "__main__": token = CLOUDFLARE_TOKEN cmd = ( - "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " - f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}" + "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent " + f"-e SSH_AUTH_SOCK=/ssh-agent -e CLOUDFLARE_TOKEN={token} " + f"-e EXTRA_BUILD_ARGS='--verbose' --volume={repo_path}:/repo_path" + f" --volume={test_output}:/output_path {docker_image}" ) run_log_path = os.path.join(test_output, "runlog.log") From c268296fc6735ee7b6c03799e0dd565bcfd7baaf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 23 May 2022 09:53:46 +0000 Subject: [PATCH 401/615] Fix single node force recovery and add tests --- src/Coordination/KeeperServer.cpp | 36 ++-- .../__init__.py | 1 + .../configs/enable_keeper1.xml | 33 ++++ .../configs/enable_keeper1_solo.xml | 24 +++ .../configs/enable_keeper2.xml | 33 ++++ .../configs/enable_keeper3.xml | 33 ++++ .../configs/use_keeper.xml | 16 ++ .../test.py | 157 ++++++++++++++++++ 8 files changed, 322 insertions(+), 11 deletions(-) create mode 100644 tests/integration/test_keeper_force_recovery_single_node/__init__.py create mode 100644 tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml create mode 100644 tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml create mode 100644 tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml create mode 100644 tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml create mode 100644 tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml create mode 100644 tests/integration/test_keeper_force_recovery_single_node/test.py diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 1f089ba2cb7..ec2cb0b2b84 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -466,20 +466,23 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ { if (is_recovering) { + const auto finish_recovering = [&] + { + auto new_params = raft_instance->get_current_params(); + new_params.custom_commit_quorum_size_ = 0; + new_params.custom_election_quorum_size_ = 0; + raft_instance->update_params(new_params); + + LOG_INFO(log, "Recovery is done. You can continue using cluster normally."); + is_recovering = false; + }; + switch (type) { case nuraft::cb_func::HeartBeat: { if (raft_instance->isClusterHealthy()) - { - auto new_params = raft_instance->get_current_params(); - new_params.custom_commit_quorum_size_ = 0; - new_params.custom_election_quorum_size_ = 0; - raft_instance->update_params(new_params); - - LOG_INFO(log, "Recovery is done. You can continue using cluster normally."); - is_recovering = false; - } + finish_recovering(); break; } case nuraft::cb_func::NewConfig: @@ -490,8 +493,19 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ // Because we manually set the config to commit // we need to call the reconfigure also uint64_t log_idx = *static_cast(param->ctx); - if (log_idx == state_manager->load_config()->get_log_idx()) - raft_instance->forceReconfigure(state_manager->load_config()); + + auto config = state_manager->load_config(); + if (log_idx == config->get_log_idx()) + { + raft_instance->forceReconfigure(config); + + // Single node cluster doesn't need to wait for any other nodes + // so we can finish recovering immediatelly after applying + // new configuration + if (config->get_servers().size() == 1) + finish_recovering(); + } + break; } case nuraft::cb_func::ProcessReq: diff --git a/tests/integration/test_keeper_force_recovery_single_node/__init__.py b/tests/integration/test_keeper_force_recovery_single_node/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_force_recovery_single_node/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml new file mode 100644 index 00000000000..441c1bc185d --- /dev/null +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml @@ -0,0 +1,33 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + + + 3 + node3 + 9234 + + + + diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml new file mode 100644 index 00000000000..f0cb887b062 --- /dev/null +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml @@ -0,0 +1,24 @@ + + + 1 + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + + 1 + node1 + 9234 + + + + diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml new file mode 100644 index 00000000000..e2e2c1fd7db --- /dev/null +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml @@ -0,0 +1,33 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + + + 3 + node3 + 9234 + + + + diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml new file mode 100644 index 00000000000..e2ac0400d88 --- /dev/null +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml @@ -0,0 +1,33 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + + + 3 + node3 + 9234 + + + + diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml new file mode 100644 index 00000000000..384e984f210 --- /dev/null +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml @@ -0,0 +1,16 @@ + + + + node1 + 9181 + + + node2 + 9181 + + + node3 + 9181 + + + diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py new file mode 100644 index 00000000000..f833cf96197 --- /dev/null +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -0,0 +1,157 @@ +import os +import pytest +import socket +from helpers.cluster import ClickHouseCluster +import time + + +from kazoo.client import KazooClient + +CLUSTER_SIZE = 3 + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + + +def get_nodes(): + nodes = [] + for i in range(CLUSTER_SIZE): + nodes.append( + cluster.add_instance( + f"node{i+1}", + main_configs=[ + f"configs/enable_keeper{i+1}.xml", + f"configs/use_keeper.xml" + ], + stay_alive=True, + ) + ) + + return nodes + + +nodes = get_nodes() + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + ) + _fake_zk_instance.start() + return _fake_zk_instance + + +def get_keeper_socket(node_name): + hosts = cluster.get_instance_ip(node_name) + client = socket.socket() + client.settimeout(10) + client.connect((hosts, 9181)) + return client + + +def send_4lw_cmd(node_name, cmd="ruok"): + client = None + try: + client = get_keeper_socket(node_name) + client.send(cmd.encode()) + data = client.recv(100_000) + data = data.decode() + return data + finally: + if client is not None: + client.close() + + +def wait_until_connected(node_name): + while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_nodes(nodes): + for node in nodes: + wait_until_connected(node.name) + + +def wait_and_assert_data(zk, path, data): + while zk.exists(path) is None: + time.sleep(0.1) + assert zk.get(path)[0] == data.encode() + + +def close_zk(zk): + zk.stop() + zk.close() + + +NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" + + +def test_cluster_recovery(started_cluster): + node_zks = [] + try: + wait_nodes(nodes) + + node_zks = [get_fake_zk(node.name) for node in nodes] + + data_in_cluster = [] + + def add_data(zk, path, data): + zk.create(path, data.encode()) + data_in_cluster.append((path, data)) + + def assert_all_data(zk): + for path, data in data_in_cluster: + wait_and_assert_data(zk, path, data) + + for i, zk in enumerate(node_zks): + add_data(zk, f"/test_force_recovery_node{i+1}", f"somedata{i+1}") + + for zk in node_zks: + assert_all_data(zk) + + nodes[0].stop_clickhouse() + + add_data(node_zks[1], "/test_force_recovery_extra", "somedataextra") + + for node_zk in node_zks[2:CLUSTER_SIZE]: + wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") + + nodes[0].start_clickhouse() + wait_until_connected(nodes[0].name) + + node_zks[0] = get_fake_zk(nodes[0].name) + wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") + + # stop all nodes + for node_zk in node_zks: + close_zk(node_zk) + node_zks = [] + + for node in nodes: + node.stop_clickhouse() + + nodes[0].copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper1_solo.xml"), + "/etc/clickhouse-server/config.d/enable_keeper1.xml", + ) + + nodes[0].start_clickhouse() + wait_until_connected(nodes[0].name) + + assert_all_data(get_fake_zk(nodes[0].name)) + finally: + try: + for zk_conn in node_zks: + close_zk(zk_conn) + except: + pass From 3a22d8ef2c19dc7901eabc8f6a4d1a2e2f52bb15 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 12:09:44 +0200 Subject: [PATCH 402/615] CompressedWriteBuffer added comment --- src/Compression/CompressedWriteBuffer.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index 93f163dc1af..6c1dbd9e00c 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -22,15 +22,22 @@ void CompressedWriteBuffer::nextImpl() if (!offset()) return; - UInt32 compressed_size = 0; size_t decompressed_size = offset(); UInt32 compressed_reserve_size = codec->getCompressedReserveSize(decompressed_size); - if (out.available() > compressed_reserve_size + CHECKSUM_SIZE) + /** During compression we need buffer with capacity >= compressed_reserve_size + CHECKSUM_SIZE. + * + * If output buffer has necessary capacity, we can compress data directly in output buffer. + * Then we can write checksum at the output buffer begin. + * + * If output buffer does not have necessary capacity. Compress data in temporary buffer. + * Then we can write checksum and temporary buffer in output buffer. + */ + if (out.available() >= compressed_reserve_size + CHECKSUM_SIZE) { char * out_checksum_ptr = out.position(); char * out_compressed_ptr = out.position() + CHECKSUM_SIZE; - compressed_size = codec->compress(working_buffer.begin(), decompressed_size, out_compressed_ptr); + UInt32 compressed_size = codec->compress(working_buffer.begin(), decompressed_size, out_compressed_ptr); CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(out_compressed_ptr, compressed_size); memcpy(out_checksum_ptr, reinterpret_cast(&checksum), CHECKSUM_SIZE); @@ -39,7 +46,7 @@ void CompressedWriteBuffer::nextImpl() else { compressed_buffer.resize(compressed_reserve_size); - compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data()); + UInt32 compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data()); CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size); out.write(reinterpret_cast(&checksum), CHECKSUM_SIZE); From 4067d646df5b5cdd57c987bacae4475c8a4f707a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 23 May 2022 12:14:55 +0200 Subject: [PATCH 403/615] Delete redundant file --- src/Disks/SingleDiskVolume.cpp | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 src/Disks/SingleDiskVolume.cpp diff --git a/src/Disks/SingleDiskVolume.cpp b/src/Disks/SingleDiskVolume.cpp deleted file mode 100644 index 47140407026..00000000000 --- a/src/Disks/SingleDiskVolume.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include - -namespace DB -{ - -} From 78782de8870a96d4007f70949730049e295eb572 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 12:19:14 +0200 Subject: [PATCH 404/615] Functions normalizeUTF8 logical error fix --- src/Functions/normalizeString.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/normalizeString.cpp b/src/Functions/normalizeString.cpp index 8989e56d0d3..9b1d1292d2c 100644 --- a/src/Functions/normalizeString.cpp +++ b/src/Functions/normalizeString.cpp @@ -106,7 +106,7 @@ struct NormalizeUTF8Impl size_t from_size = offsets[i] - current_from_offset - 1; from_uchars.resize(from_size + 1); - int32_t from_code_points; + int32_t from_code_points = 0; u_strFromUTF8( from_uchars.data(), from_uchars.size(), @@ -133,7 +133,7 @@ struct NormalizeUTF8Impl if (res_data.size() < max_to_size) res_data.resize(max_to_size); - int32_t to_size; + int32_t to_size = 0; u_strToUTF8( reinterpret_cast(&res_data[current_to_offset]), res_data.size() - current_to_offset, @@ -151,6 +151,8 @@ struct NormalizeUTF8Impl current_from_offset = offsets[i]; } + + res_data.resize(current_to_offset); } [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) From e44fec0127751d49bf77c81ed3233a63d3398350 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 12:21:59 +0200 Subject: [PATCH 405/615] Added tests --- .../02311_normalize_utf8_constant.reference | 1 + .../0_stateless/02311_normalize_utf8_constant.sql | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/02311_normalize_utf8_constant.reference create mode 100644 tests/queries/0_stateless/02311_normalize_utf8_constant.sql diff --git a/tests/queries/0_stateless/02311_normalize_utf8_constant.reference b/tests/queries/0_stateless/02311_normalize_utf8_constant.reference new file mode 100644 index 00000000000..efd3caf8a45 --- /dev/null +++ b/tests/queries/0_stateless/02311_normalize_utf8_constant.reference @@ -0,0 +1 @@ +â â â â â C3A2 C3A2 61CC82 C3A2 61CC82 diff --git a/tests/queries/0_stateless/02311_normalize_utf8_constant.sql b/tests/queries/0_stateless/02311_normalize_utf8_constant.sql new file mode 100644 index 00000000000..b7c9693b6b8 --- /dev/null +++ b/tests/queries/0_stateless/02311_normalize_utf8_constant.sql @@ -0,0 +1,11 @@ +SELECT + 'â' AS s, + normalizeUTF8NFC(s) s1, + normalizeUTF8NFD(s) s2, + normalizeUTF8NFKC(s) s3, + normalizeUTF8NFKD(s) s4, + hex(s), + hex(s1), + hex(s2), + hex(s3), + hex(s4); From 100afa8bcfd843277232293c29e8331dc7f4653c Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 12 May 2022 12:36:32 +0200 Subject: [PATCH 406/615] Dictionary getDescendants performance improvement --- src/Dictionaries/FlatDictionary.cpp | 22 +- src/Dictionaries/FlatDictionary.h | 5 +- src/Dictionaries/HashedArrayDictionary.cpp | 32 ++- src/Dictionaries/HashedArrayDictionary.h | 5 +- src/Dictionaries/HashedDictionary.cpp | 33 ++- src/Dictionaries/HashedDictionary.h | 5 +- .../HierarchyDictionariesUtils.cpp | 16 ++ src/Dictionaries/HierarchyDictionariesUtils.h | 109 +++++---- src/Dictionaries/IDictionary.h | 13 +- .../gtest_hierarchy_dictionaries_utils.cpp | 12 +- .../FunctionsExternalDictionaries.cpp | 4 +- src/Functions/FunctionsExternalDictionaries.h | 220 +++++++++++------- 12 files changed, 307 insertions(+), 169 deletions(-) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 2fe9f09741b..ff13cbcdff9 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -244,14 +244,8 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( return result; } -ColumnPtr FlatDictionary::getDescendants( - ColumnPtr key_column, - const DataTypePtr &, - size_t level) const +DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchyParentToChildIndex() const { - PaddedPODArray keys_backup; - const auto & keys = getColumnVectorData(this, key_column, keys_backup); - size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); @@ -266,8 +260,20 @@ ColumnPtr FlatDictionary::getDescendants( parent_to_child[parent_key].emplace_back(static_cast(i)); } + return std::make_shared(std::move(parent_to_child)); +} + +ColumnPtr FlatDictionary::getDescendants( + ColumnPtr key_column, + const DataTypePtr &, + size_t level, + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index) const +{ + PaddedPODArray keys_backup; + const auto & keys = getColumnVectorData(this, key_column, keys_backup); + size_t keys_found; - auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found); + auto result = getKeysDescendantsArray(keys, *parent_to_child_index, level, keys_found); query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index f342c38802d..dc97ec2a79e 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -92,10 +92,13 @@ public: ColumnPtr in_key_column, const DataTypePtr & key_type) const override; + DictionaryHierarchyParentToChildIndexPtr getHierarchyParentToChildIndex() const override; + ColumnPtr getDescendants( ColumnPtr key_column, const DataTypePtr & key_type, - size_t level) const override; + size_t level, + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index) const override; Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override; diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 5a2586147b8..3fc06ba5960 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -282,18 +282,11 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( } template -ColumnPtr HashedArrayDictionary::getDescendants( - ColumnPtr key_column [[maybe_unused]], - const DataTypePtr &, - size_t level [[maybe_unused]]) const +DictionaryHierarchyParentToChildIndexPtr HashedArrayDictionary::getHierarchyParentToChildIndex() const { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - PaddedPODArray keys_backup; - const auto & keys = getColumnVectorData(this, key_column, keys_backup); - size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; - const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); @@ -306,6 +299,7 @@ ColumnPtr HashedArrayDictionary::getDescendants( index_to_key[value] = key; HashMap> parent_to_child; + parent_to_child.reserve(index_to_key.size()); for (size_t i = 0; i < parent_keys_container.size(); ++i) { @@ -318,8 +312,28 @@ ColumnPtr HashedArrayDictionary::getDescendants( parent_to_child[parent_key].emplace_back(child_key); } + return std::make_shared(std::move(parent_to_child)); + } + else + { + return nullptr; + } +} + +template +ColumnPtr HashedArrayDictionary::getDescendants( + ColumnPtr key_column [[maybe_unused]], + const DataTypePtr &, + size_t level [[maybe_unused]], + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const +{ + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + PaddedPODArray keys_backup; + const auto & keys = getColumnVectorData(this, key_column, keys_backup); + size_t keys_found = 0; - auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found); + auto result = getKeysDescendantsArray(keys, *parent_to_child_index, level, keys_found); query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index a649fddcc39..17bd5c17102 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -109,10 +109,13 @@ public: ColumnPtr in_key_column, const DataTypePtr & key_type) const override; + DictionaryHierarchyParentToChildIndexPtr getHierarchyParentToChildIndex() const override; + ColumnPtr getDescendants( ColumnPtr key_column, const DataTypePtr & key_type, - size_t level) const override; + size_t level, + DictionaryHierarchyParentToChildIndexPtr) const override; Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override; diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 7025922da12..3392ca048db 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -318,18 +318,11 @@ ColumnUInt8::Ptr HashedDictionary::isInHierarchy( } template -ColumnPtr HashedDictionary::getDescendants( - ColumnPtr key_column [[maybe_unused]], - const DataTypePtr &, - size_t level [[maybe_unused]]) const +DictionaryHierarchyParentToChildIndexPtr HashedDictionary::getHierarchyParentToChildIndex() const { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - PaddedPODArray keys_backup; - const auto & keys = getColumnVectorData(this, key_column, keys_backup); - size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; - const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; const CollectionType & parent_keys = std::get>(hierarchical_attribute.container); @@ -338,8 +331,28 @@ ColumnPtr HashedDictionary::getDescendants( for (const auto & [key, value] : parent_keys) parent_to_child[value].emplace_back(key); + return std::make_shared(std::move(parent_to_child)); + } + else + { + return nullptr; + } +} + +template +ColumnPtr HashedDictionary::getDescendants( + ColumnPtr key_column [[maybe_unused]], + const DataTypePtr &, + size_t level [[maybe_unused]], + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const +{ + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + PaddedPODArray keys_backup; + const auto & keys = getColumnVectorData(this, key_column, keys_backup); + size_t keys_found; - auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found); + auto result = getKeysDescendantsArray(keys, *parent_to_child_index, level, keys_found); query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); @@ -347,7 +360,9 @@ ColumnPtr HashedDictionary::getDescendants( return result; } else + { return nullptr; + } } template diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 1ef1c58b67c..433bb7eef67 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -110,10 +110,13 @@ public: ColumnPtr in_key_column, const DataTypePtr & key_type) const override; + DictionaryHierarchyParentToChildIndexPtr getHierarchyParentToChildIndex() const override; + ColumnPtr getDescendants( ColumnPtr key_column, const DataTypePtr & key_type, - size_t level) const override; + size_t level, + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index) const override; Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override; diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index dd729fe636c..03a52263c91 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -8,6 +8,22 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } +namespace detail +{ + ColumnPtr convertElementsAndOffsetsIntoArray(ElementsAndOffsets && elements_and_offsets) + { + auto elements_column = ColumnVector::create(); + elements_column->getData() = std::move(elements_and_offsets.elements); + + auto offsets_column = ColumnVector::create(); + offsets_column->getData() = std::move(elements_and_offsets.offsets); + + auto column_array = ColumnArray::create(std::move(elements_column), std::move(offsets_column)); + + return column_array; + } +} + namespace { /** In case of cache or direct dictionary we does not have structure with child to parent representation. diff --git a/src/Dictionaries/HierarchyDictionariesUtils.h b/src/Dictionaries/HierarchyDictionariesUtils.h index f0c6b4b1a0f..9f40e54d41d 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.h +++ b/src/Dictionaries/HierarchyDictionariesUtils.h @@ -14,25 +14,54 @@ namespace DB { +class DictionaryHierarchyParentToChildIndex; +using DictionaryHierarchyParentToChildIndexPtr = std::shared_ptr; + +class DictionaryHierarchyParentToChildIndex +{ +public: + struct KeysRange + { + UInt32 start_index; + UInt32 end_index; + }; + + explicit DictionaryHierarchyParentToChildIndex(const HashMap> & parent_to_children_map_) + { + parent_to_children_map.reserve(parent_to_children_map_.size()); + for (const auto & [parent, children] : parent_to_children_map_) + { + auto & parent_to_children_array = parent_to_children_map[parent]; + parent_to_children_array.assign(children); + } + } + + explicit DictionaryHierarchyParentToChildIndex(HashMap> && parent_to_children_map_) + : parent_to_children_map(std::move(parent_to_children_map_)) + { + } + + /// Parent to children map + HashMap> parent_to_children_map; +}; + namespace detail { - template struct ElementsAndOffsets { - PaddedPODArray elements; + PaddedPODArray elements; PaddedPODArray offsets; }; - template struct IsKeyValidFuncInterface { - bool operator()(T key [[maybe_unused]]) { return false; } + bool operator()(UInt64 key [[maybe_unused]]) { return false; } }; template struct GetParentKeyFuncInterface { - std::optional operator()(T key [[maybe_unused]]) { return {}; } + std::optional operator()(UInt64 key [[maybe_unused]]) { return {}; } }; /** Calculate hierarchy for keys iterating the hierarchy from child to parent using get_parent_key_func provided by client. @@ -54,16 +83,16 @@ namespace detail * Elements: [1, 2, 1, 3, 1, 4, 2, 1] * Offsets: [1, 3, 5, 8, 8] */ - template - ElementsAndOffsets getHierarchy( - const PaddedPODArray & keys, - const KeyType & hierarchy_null_value, + template + ElementsAndOffsets getHierarchy( + const PaddedPODArray & keys, + const UInt64 & hierarchy_null_value, IsKeyValidFunc && is_key_valid_func, GetParentKeyFunc && get_parent_key_func) { size_t hierarchy_keys_size = keys.size(); - PaddedPODArray elements; + PaddedPODArray elements; elements.reserve(hierarchy_keys_size); PaddedPODArray offsets; @@ -75,7 +104,7 @@ namespace detail size_t array_element_offset; }; - HashMap already_processes_keys_to_offset; + HashMap already_processes_keys_to_offset; already_processes_keys_to_offset.reserve(hierarchy_keys_size); for (size_t i = 0; i < hierarchy_keys_size; ++i) @@ -123,7 +152,7 @@ namespace detail elements.emplace_back(hierarchy_key); ++current_hierarchy_depth; - std::optional parent_key = std::forward(get_parent_key_func)(hierarchy_key); + std::optional parent_key = std::forward(get_parent_key_func)(hierarchy_key); if (!parent_key.has_value()) break; @@ -134,7 +163,7 @@ namespace detail offsets.emplace_back(elements.size()); } - ElementsAndOffsets result = {std::move(elements), std::move(offsets)}; + ElementsAndOffsets result = {std::move(elements), std::move(offsets)}; return result; } @@ -146,11 +175,11 @@ namespace detail * * Not: keys size must be equal to in_keys_size. */ - template + template PaddedPODArray getIsInHierarchy( - const PaddedPODArray & keys, - const PaddedPODArray & in_keys, - const KeyType & hierarchy_null_value, + const PaddedPODArray & keys, + const PaddedPODArray & in_keys, + const UInt64 & hierarchy_null_value, IsKeyValidFunc && is_key_valid_func, GetParentKeyFunc && get_parent_func) { @@ -159,7 +188,7 @@ namespace detail PaddedPODArray result; result.resize_fill(keys.size()); - detail::ElementsAndOffsets hierarchy = detail::getHierarchy( + detail::ElementsAndOffsets hierarchy = detail::getHierarchy( keys, hierarchy_null_value, std::forward(is_key_valid_func), @@ -216,19 +245,21 @@ namespace detail * Result: [1], [2, 3], [4], [], []; * Offsets: [1, 3, 4, 4, 4]; */ - template - ElementsAndOffsets getDescendants( - const PaddedPODArray & keys, - const HashMap> & parent_to_child, + template + ElementsAndOffsets getDescendants( + const PaddedPODArray & keys, + const DictionaryHierarchyParentToChildIndex & parent_to_child_index, Strategy strategy, size_t & valid_keys) { + auto & parent_to_child = parent_to_child_index.parent_to_children_map; + /// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants. /// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy. size_t keys_size = keys.size(); valid_keys = 0; - PaddedPODArray descendants; + PaddedPODArray descendants; descendants.reserve(keys_size); PaddedPODArray descendants_offsets; @@ -241,18 +272,18 @@ namespace detail }; static constexpr Int64 key_range_requires_update = -1; - HashMap already_processed_keys_to_range [[maybe_unused]]; + HashMap already_processed_keys_to_range [[maybe_unused]]; if constexpr (std::is_same_v) already_processed_keys_to_range.reserve(keys_size); struct KeyAndDepth { - KeyType key; + UInt64 key; Int64 depth; }; - HashSet already_processed_keys_during_loop; + HashSet already_processed_keys_during_loop; already_processed_keys_during_loop.reserve(keys_size); PaddedPODArray next_keys_to_process_stack; @@ -262,7 +293,7 @@ namespace detail for (size_t i = 0; i < keys_size; ++i) { - const KeyType & requested_key = keys[i]; + const UInt64 & requested_key = keys[i]; if (parent_to_child.find(requested_key) == nullptr) { @@ -282,7 +313,7 @@ namespace detail { KeyAndDepth key_to_process = next_keys_to_process_stack.back(); - KeyType key = key_to_process.key; + UInt64 key = key_to_process.key; Int64 depth = key_to_process.depth; next_keys_to_process_stack.pop_back(); @@ -370,24 +401,12 @@ namespace detail descendants_offsets.emplace_back(descendants.size()); } - ElementsAndOffsets result = {std::move(descendants), std::move(descendants_offsets)}; + ElementsAndOffsets result = {std::move(descendants), std::move(descendants_offsets)}; return result; } /// Converts ElementAndOffsets structure into ArrayColumn - template - ColumnPtr convertElementsAndOffsetsIntoArray(ElementsAndOffsets && elements_and_offsets) - { - auto elements_column = ColumnVector::create(); - elements_column->getData() = std::move(elements_and_offsets.elements); - - auto offsets_column = ColumnVector::create(); - offsets_column->getData() = std::move(elements_and_offsets.offsets); - - auto column_array = ColumnArray::create(std::move(elements_column), std::move(offsets_column)); - - return column_array; - } + ColumnPtr convertElementsAndOffsetsIntoArray(ElementsAndOffsets && elements_and_offsets); } /// Returns hierarchy array column for keys @@ -435,20 +454,20 @@ ColumnUInt8::Ptr getKeysIsInHierarchyColumn( template ColumnPtr getKeysDescendantsArray( const PaddedPODArray & requested_keys, - const HashMap> & parent_to_child, + const DictionaryHierarchyParentToChildIndex & parent_to_child_index, size_t level, size_t & valid_keys) { if (level == 0) { detail::GetAllDescendantsStrategy strategy { .level = level }; - auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy, valid_keys); + auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child_index, strategy, valid_keys); return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets)); } else { detail::GetDescendantsAtSpecificLevelStrategy strategy { .level = level }; - auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy, valid_keys); + auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child_index, strategy, valid_keys); return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets)); } } diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index c18dbcfbea7..d1e1bff1231 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -24,6 +24,9 @@ namespace ErrorCodes class IDictionary; using DictionaryPtr = std::unique_ptr; +class DictionaryHierarchyParentToChildIndex; +using DictionaryHierarchyParentToChildIndexPtr = std::shared_ptr; + /** DictionaryKeyType provides IDictionary client information about * which key type is supported by dictionary. * @@ -228,10 +231,18 @@ public: getDictionaryID().getNameForLogs()); } + virtual DictionaryHierarchyParentToChildIndexPtr getHierarchyParentToChildIndex() const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Method getParentToChildIndex is not supported for {} dictionary.", + getDictionaryID().getNameForLogs()); + } + virtual ColumnPtr getDescendants( ColumnPtr key_column [[maybe_unused]], const DataTypePtr & key_type [[maybe_unused]], - size_t level [[maybe_unused]]) const + size_t level [[maybe_unused]], + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDescendants is not supported for {} dictionary.", diff --git a/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp b/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp index f4854b7c77e..8d0700af8cf 100644 --- a/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp +++ b/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp @@ -151,13 +151,15 @@ TEST(HierarchyDictionariesUtils, getDescendants) parent_to_child[1].emplace_back(3); parent_to_child[2].emplace_back(4); + auto parent_to_child_index = std::make_shared(parent_to_child); + PaddedPODArray keys = {0, 1, 2, 3, 4}; { size_t keys_found; auto result = DB::detail::getDescendants( keys, - parent_to_child, + *parent_to_child_index, DB::detail::GetAllDescendantsStrategy(), keys_found); @@ -175,7 +177,7 @@ TEST(HierarchyDictionariesUtils, getDescendants) size_t keys_found; auto result = DB::detail::getDescendants( keys, - parent_to_child, + *parent_to_child_index, DB::detail::GetDescendantsAtSpecificLevelStrategy{1}, keys_found); @@ -195,13 +197,15 @@ TEST(HierarchyDictionariesUtils, getDescendants) parent_to_child[1].emplace_back(2); parent_to_child[2].emplace_back(1); + auto parent_to_child_index = std::make_shared(parent_to_child); + PaddedPODArray keys = {1, 2, 3}; { size_t keys_found; auto result = DB::detail::getDescendants( keys, - parent_to_child, + *parent_to_child_index, DB::detail::GetAllDescendantsStrategy(), keys_found); @@ -219,7 +223,7 @@ TEST(HierarchyDictionariesUtils, getDescendants) size_t keys_found; auto result = DB::detail::getDescendants( keys, - parent_to_child, + *parent_to_child_index, DB::detail::GetDescendantsAtSpecificLevelStrategy{1}, keys_found); diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp index 6b83f761086..0cd56f87df4 100644 --- a/src/Functions/FunctionsExternalDictionaries.cpp +++ b/src/Functions/FunctionsExternalDictionaries.cpp @@ -24,8 +24,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 189ec7321c1..4c1358f435c 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1053,86 +1053,161 @@ private: mutable FunctionDictHelper helper; }; -class FunctionDictGetChildren final : public IFunction +class FunctionDictGetDescendantsExecutable final : public IExecutableFunction { public: - static constexpr auto name = "dictGetChildren"; - - static FunctionPtr create(ContextPtr context) - { - return std::make_shared(context); - } - - explicit FunctionDictGetChildren(ContextPtr context_) - : helper(context_) {} + FunctionDictGetDescendantsExecutable( + String name_, + size_t level_, + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index_, + std::shared_ptr dictionary_helper_) + : name(name_) + , level(level_) + , parent_to_child_index(std::move(parent_to_child_index_)) + , dictionary_helper(std::move(dictionary_helper_)) + {} String getName() const override { return name; } -private: - size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForConstants() const final { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; } - bool isDeterministic() const override { return false; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (!isString(arguments[0].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of first argument of function {}. Expected String. Actual type {}", - getName(), - arguments[0].type->getName()); - - auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - - return std::make_shared(hierarchical_attribute.type); - } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 2}; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); - auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + auto dictionary = dictionary_helper->getDictionary(arguments[0].column); + const auto & hierarchical_attribute = dictionary_helper->getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); - ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, 1); - + ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level, parent_to_child_index); return result; } - mutable FunctionDictHelper helper; + String name; + size_t level; + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index; + std::shared_ptr dictionary_helper; }; -class FunctionDictGetDescendants final : public IFunction +class FunctionDictGetDescendantsBase final : public IFunctionBase { public: - static constexpr auto name = "dictGetDescendants"; - - static FunctionPtr create(ContextPtr context) - { - return std::make_shared(context); - } - - explicit FunctionDictGetDescendants(ContextPtr context_) - : helper(context_) {} + FunctionDictGetDescendantsBase( + String name_, + const DataTypes & argument_types_, + const DataTypePtr & result_type_, + size_t level_, + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index, + std::shared_ptr helper_) + : name(name_) + , argument_types(argument_types_) + , result_type(result_type_) + , level(level_) + , parent_to_child_index(std::move(parent_to_child_index)) + , helper(std::move(helper_)) + {} String getName() const override { return name; } -private: - size_t getNumberOfArguments() const override { return 0; } - bool isVariadic() const override { return true; } + const DataTypes & getArgumentTypes() const override { return argument_types; } + + const DataTypePtr & getResultType() const override { return result_type; } - bool useDefaultImplementationForConstants() const final { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 2}; } - bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_shared(name, level, parent_to_child_index, helper); + } + + String name; + DataTypes argument_types; + DataTypePtr result_type; + size_t level; + DictionaryHierarchyParentToChildIndexPtr parent_to_child_index; + std::shared_ptr helper; +}; + +struct FunctionDictGetDescendantsStrategy +{ + static constexpr auto name = "dictGetDescendants"; + static constexpr size_t default_level = 0; + static constexpr size_t number_of_arguments = 0; + static constexpr bool is_variadic = true; +}; + +struct FunctionDictGetChildrenStrategy +{ + static constexpr auto name = "dictGetChildren"; + static constexpr size_t default_level = 1; + static constexpr size_t number_of_arguments = 2; + static constexpr bool is_variadic = false; +}; + +template +class FunctionDictGetDescendantsOverloadResolverImpl final : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = Strategy::name; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return Strategy::number_of_arguments; } + + bool isVariadic() const override { return Strategy::is_variadic; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 2}; } + + bool isDeterministic() const override { return false; } + + explicit FunctionDictGetDescendantsOverloadResolverImpl(ContextPtr context) + : dictionary_helper(std::make_shared(std::move(context))) + {} + + static FunctionOverloadResolverPtr create(ContextPtr context) + { + return std::make_shared(std::move(context)); + } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override + { + std::cout << "FunctionDictGetDescendantsOverloadResolver::buildImpl" << std::endl; + + auto dictionary = dictionary_helper->getDictionary(arguments[0].column); + auto hierarchy_parent_to_child_index = dictionary->getHierarchyParentToChildIndex(); + + size_t level = Strategy::default_level; + + if (arguments.size() == 3) + { + if (!isColumnConst(*arguments[2].column)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of third argument of function {}. Expected const unsigned integer.", + getName()); + + auto value = static_cast(arguments[2].column->getInt(0)); + if (value < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of third argument of function {}. Expected const unsigned integer.", + getName()); + + level = static_cast(value); + } + + DataTypes argument_types; + argument_types.reserve(arguments.size()); + + for (auto & argument : arguments) + argument_types.emplace_back(argument.type); + + return std::make_shared(name, argument_types, result_type, level, hierarchy_parent_to_child_index, dictionary_helper); + } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { size_t arguments_size = arguments.size(); @@ -1158,47 +1233,16 @@ private: arguments[2].type->getName()); } - auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + auto dictionary = dictionary_helper->getDictionary(arguments[0].column); + const auto & hierarchical_attribute = dictionary_helper->getDictionaryHierarchicalAttribute(dictionary); return std::make_shared(hierarchical_attribute.type); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - if (input_rows_count == 0) - return result_type->createColumn(); - - auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - - size_t level = 0; - - if (arguments.size() == 3) - { - if (!isColumnConst(*arguments[2].column)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of third argument of function {}. Expected const unsigned integer.", - getName()); - - auto value = static_cast(arguments[2].column->getInt(0)); - if (value < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of third argument of function {}. Expected const unsigned integer.", - getName()); - - level = static_cast(value); - } - - auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; - auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); - - ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level); - - return result; - } - - mutable FunctionDictHelper helper; + std::shared_ptr dictionary_helper; }; +using FunctionDictGetDescendantsOverloadResolver = FunctionDictGetDescendantsOverloadResolverImpl; +using FunctionDictGetChildrenOverloadResolver = FunctionDictGetDescendantsOverloadResolverImpl; + } From 1142e05683929972872b5b1f6abb2e1be0fd01a9 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 12 May 2022 13:20:27 +0200 Subject: [PATCH 407/615] Dictionaries allow to specify bidirectional for hierarhical attribute --- src/Dictionaries/DictionaryStructure.cpp | 7 +++++- src/Dictionaries/DictionaryStructure.h | 1 + src/Dictionaries/FlatDictionary.cpp | 16 +++++++++++++ src/Dictionaries/FlatDictionary.h | 6 +++++ src/Dictionaries/HashedArrayDictionary.cpp | 24 +++++++++++++++++-- src/Dictionaries/HashedArrayDictionary.h | 3 +++ src/Dictionaries/HashedDictionary.cpp | 21 ++++++++++++++-- src/Dictionaries/HashedDictionary.h | 3 +++ .../getDictionaryConfigurationFromAST.cpp | 8 +++++++ .../ASTDictionaryAttributeDeclaration.cpp | 3 +++ .../ASTDictionaryAttributeDeclaration.h | 2 ++ .../ParserDictionaryAttributeDeclaration.cpp | 9 +++++++ 12 files changed, 98 insertions(+), 5 deletions(-) diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 012750bde60..5624f9595d7 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -252,7 +252,7 @@ Strings DictionaryStructure::getKeysNames() const static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys) { static const std::unordered_set valid_keys - = {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"}; + = {"name", "type", "expression", "null_value", "hierarchical", "bidirectional", "injective", "is_object_id"}; for (const auto & key : keys) { @@ -350,6 +350,7 @@ std::vector DictionaryStructure::getAttributes( } const auto hierarchical = config.getBool(prefix + "hierarchical", false); + const auto bidirectional = config.getBool(prefix + "bidirectional", false); const auto injective = config.getBool(prefix + "injective", false); const auto is_object_id = config.getBool(prefix + "is_object_id", false); @@ -362,6 +363,9 @@ std::vector DictionaryStructure::getAttributes( if (has_hierarchy && hierarchical) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one hierarchical attribute supported"); + if (bidirectional && !hierarchical) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bidirectional can only be applied to hierarchical attributes"); + has_hierarchy = has_hierarchy || hierarchical; res_attributes.emplace_back(DictionaryAttribute{ @@ -372,6 +376,7 @@ std::vector DictionaryStructure::getAttributes( expression, null_value, hierarchical, + bidirectional, injective, is_object_id, is_nullable}); diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 50cfba01894..bb4c306affa 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -67,6 +67,7 @@ struct DictionaryAttribute final const std::string expression; const Field null_value; const bool hierarchical; + const bool bidirectional; const bool injective; const bool is_object_id; const bool is_nullable; diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index ff13cbcdff9..6f7b210d212 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -43,6 +43,7 @@ FlatDictionary::FlatDictionary( { createAttributes(); loadData(); + buildHierarchyParentToChildIndexIfNeeded(); calculateBytesAllocated(); } @@ -246,6 +247,9 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchyParentToChildIndex() const { + if (hierarchy_parent_to_child_index) + return hierarchy_parent_to_child_index; + size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); @@ -406,6 +410,15 @@ void FlatDictionary::loadData() throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, "{}: dictionary source is empty and 'require_nonempty' property is set.", getFullName()); } +void FlatDictionary::buildHierarchyParentToChildIndexIfNeeded() +{ + if (!dict_struct.hierarchical_attribute_index) + return; + + if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional) + hierarchy_parent_to_child_index = getHierarchyParentToChildIndex(); +} + void FlatDictionary::calculateBytesAllocated() { bytes_allocated += attributes.size() * sizeof(attributes.front()); @@ -445,6 +458,9 @@ void FlatDictionary::calculateBytesAllocated() if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); + if (hierarchy_parent_to_child_index) + bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes(); + bytes_allocated += string_arena.size(); } diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index dc97ec2a79e..254c7c85375 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -140,10 +140,15 @@ private: }; void createAttributes(); + void blockToAttributes(const Block & block); + void updateData(); + void loadData(); + void buildHierarchyParentToChildIndexIfNeeded(); + void calculateBytesAllocated(); Attribute createAttribute(const DictionaryAttribute & attribute); @@ -175,6 +180,7 @@ private: BlockPtr update_field_loaded_block; Arena string_arena; + DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index; }; } diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 3fc06ba5960..7d25e99f9ad 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -37,6 +37,7 @@ HashedArrayDictionary::HashedArrayDictionary( { createAttributes(); loadData(); + buildHierarchyParentToChildIndexIfNeeded(); calculateBytesAllocated(); } @@ -286,6 +287,9 @@ DictionaryHierarchyParentToChildIndexPtr HashedArrayDictionary & parent_keys_container = std::get>(hierarchical_attribute.container); @@ -707,6 +711,16 @@ void HashedArrayDictionary::loadData() getFullName()); } +template +void HashedArrayDictionary::buildHierarchyParentToChildIndexIfNeeded() +{ + if (!dict_struct.hierarchical_attribute_index) + return; + + if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional) + hierarchy_parent_to_child_index = getHierarchyParentToChildIndex(); +} + template void HashedArrayDictionary::calculateBytesAllocated() { @@ -744,10 +758,16 @@ void HashedArrayDictionary::calculateBytesAllocated() bytes_allocated += (*attribute.is_index_null).size(); } - bytes_allocated += string_arena.size(); - if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); + + if (hierarchy_parent_to_child_index) + { + bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes(); + std::cout << "Hierarchy index size " << hierarchy_parent_to_child_index->getSizeInBytes() << std::endl; + } + + bytes_allocated += string_arena.size(); } template diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index 17bd5c17102..52e63701d61 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -176,6 +176,8 @@ private: void loadData(); + void buildHierarchyParentToChildIndexIfNeeded(); + void calculateBytesAllocated(); template @@ -224,6 +226,7 @@ private: BlockPtr update_field_loaded_block; Arena string_arena; + DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index; }; extern template class HashedArrayDictionary; diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 3392ca048db..398664306ae 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -54,6 +54,7 @@ HashedDictionary::HashedDictionary( { createAttributes(); loadData(); + buildHierarchyParentToChildIndexIfNeeded(); calculateBytesAllocated(); } @@ -322,6 +323,9 @@ DictionaryHierarchyParentToChildIndexPtr HashedDictionary & parent_keys = std::get>(hierarchical_attribute.container); @@ -646,6 +650,16 @@ void HashedDictionary::loadData() getFullName()); } +template +void HashedDictionary::buildHierarchyParentToChildIndexIfNeeded() +{ + if (!dict_struct.hierarchical_attribute_index) + return; + + if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional) + hierarchy_parent_to_child_index = getHierarchyParentToChildIndex(); +} + template void HashedDictionary::calculateBytesAllocated() { @@ -699,10 +713,13 @@ void HashedDictionary::calculateBytesAllocated() } } - bytes_allocated += string_arena.size(); - if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); + + if (hierarchy_parent_to_child_index) + bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes(); + + bytes_allocated += string_arena.size(); } template diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 433bb7eef67..75b8ed33a43 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -197,6 +197,8 @@ private: void loadData(); + void buildHierarchyParentToChildIndexIfNeeded(); + void calculateBytesAllocated(); template @@ -229,6 +231,7 @@ private: BlockPtr update_field_loaded_block; Arena string_arena; NoAttributesCollectionType no_attributes_container; + DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index; }; extern template class HashedDictionary; diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 28a842ccd8c..edc3c34fe81 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -290,6 +290,14 @@ void buildSingleAttribute( attribute_element->appendChild(hierarchical_element); } + if (dict_attr->bidirectional) + { + AutoPtr bidirectional_element(doc->createElement("bidirectional")); + AutoPtr bidirectional(doc->createTextNode("true")); + bidirectional_element->appendChild(bidirectional); + attribute_element->appendChild(bidirectional_element); + } + if (dict_attr->injective) { AutoPtr injective_element(doc->createElement("injective")); diff --git a/src/Parsers/ASTDictionaryAttributeDeclaration.cpp b/src/Parsers/ASTDictionaryAttributeDeclaration.cpp index e9c50839a98..760b96b0927 100644 --- a/src/Parsers/ASTDictionaryAttributeDeclaration.cpp +++ b/src/Parsers/ASTDictionaryAttributeDeclaration.cpp @@ -58,6 +58,9 @@ void ASTDictionaryAttributeDeclaration::formatImpl(const FormatSettings & settin if (hierarchical) settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "HIERARCHICAL"; + if (bidirectional) + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BIDIRECTIONAL"; + if (injective) settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "INJECTIVE"; diff --git a/src/Parsers/ASTDictionaryAttributeDeclaration.h b/src/Parsers/ASTDictionaryAttributeDeclaration.h index b34ebc539ec..b6572e89d16 100644 --- a/src/Parsers/ASTDictionaryAttributeDeclaration.h +++ b/src/Parsers/ASTDictionaryAttributeDeclaration.h @@ -20,6 +20,8 @@ public: ASTPtr expression; /// Is attribute mirrored to the parent identifier bool hierarchical; + /// Is hierarchical attribute bidirectional + bool bidirectional; /// Flag that shows whether the id->attribute image is injective bool injective; /// MongoDB object ID diff --git a/src/Parsers/ParserDictionaryAttributeDeclaration.cpp b/src/Parsers/ParserDictionaryAttributeDeclaration.cpp index a248c9def07..44bb7fb6057 100644 --- a/src/Parsers/ParserDictionaryAttributeDeclaration.cpp +++ b/src/Parsers/ParserDictionaryAttributeDeclaration.cpp @@ -15,6 +15,7 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_default{"DEFAULT"}; ParserKeyword s_expression{"EXPRESSION"}; ParserKeyword s_hierarchical{"HIERARCHICAL"}; + ParserKeyword s_bidirectional{"BIDIRECTIONAL"}; ParserKeyword s_injective{"INJECTIVE"}; ParserKeyword s_is_object_id{"IS_OBJECT_ID"}; ParserLiteral default_parser; @@ -30,6 +31,7 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr default_value; ASTPtr expression; bool hierarchical = false; + bool bidirectional = false; bool injective = false; bool is_object_id = false; @@ -63,6 +65,12 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E continue; } + if (!bidirectional && s_bidirectional.ignore(pos, expected)) + { + bidirectional = true; + continue; + } + if (!injective && s_injective.ignore(pos, expected)) { injective = true; @@ -101,6 +109,7 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E } attribute_declaration->hierarchical = hierarchical; + attribute_declaration->bidirectional = bidirectional; attribute_declaration->injective = injective; attribute_declaration->is_object_id = is_object_id; From 25d6bd1f34be6da25f032d49484ba761d5b72c8c Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 12 May 2022 13:39:54 +0200 Subject: [PATCH 408/615] Dictionaries optimize hierarchical index structure --- src/Dictionaries/FlatDictionary.cpp | 2 +- src/Dictionaries/HashedArrayDictionary.cpp | 2 +- src/Dictionaries/HashedDictionary.cpp | 2 +- src/Dictionaries/HierarchyDictionariesUtils.h | 56 ++++++++++++++----- 4 files changed, 44 insertions(+), 18 deletions(-) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 6f7b210d212..60959deee95 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -264,7 +264,7 @@ DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchyParentToChi parent_to_child[parent_key].emplace_back(static_cast(i)); } - return std::make_shared(std::move(parent_to_child)); + return std::make_shared(parent_to_child); } ColumnPtr FlatDictionary::getDescendants( diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 7d25e99f9ad..92e2776ecbb 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -316,7 +316,7 @@ DictionaryHierarchyParentToChildIndexPtr HashedArrayDictionary(std::move(parent_to_child)); + return std::make_shared(parent_to_child); } else { diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 398664306ae..ccfbf114fa4 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -335,7 +335,7 @@ DictionaryHierarchyParentToChildIndexPtr HashedDictionary(std::move(parent_to_child)); + return std::make_shared(parent_to_child); } else { diff --git a/src/Dictionaries/HierarchyDictionariesUtils.h b/src/Dictionaries/HierarchyDictionariesUtils.h index 9f40e54d41d..9af6f9e01d9 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.h +++ b/src/Dictionaries/HierarchyDictionariesUtils.h @@ -28,21 +28,34 @@ public: explicit DictionaryHierarchyParentToChildIndex(const HashMap> & parent_to_children_map_) { - parent_to_children_map.reserve(parent_to_children_map_.size()); - for (const auto & [parent, children] : parent_to_children_map_) + size_t parent_to_children_map_size = parent_to_children_map_.size(); + + keys.reserve(parent_to_children_map_size); + parent_to_children_keys_range.reserve(parent_to_children_map_size); + + for (auto & [parent, children] : parent_to_children_map_) { - auto & parent_to_children_array = parent_to_children_map[parent]; - parent_to_children_array.assign(children); + UInt32 start_index = static_cast(keys.size()); + for (auto child : children) + { + keys.push_back(child); + } + + UInt32 end_index = static_cast(keys.size()); + parent_to_children_keys_range[parent] = DictionaryHierarchyParentToChildIndex::KeysRange{start_index, end_index}; } } - explicit DictionaryHierarchyParentToChildIndex(HashMap> && parent_to_children_map_) - : parent_to_children_map(std::move(parent_to_children_map_)) + size_t getSizeInBytes() const { + return parent_to_children_keys_range.getBufferSizeInBytes() + (keys.size() * sizeof(UInt64)); } - /// Parent to children map - HashMap> parent_to_children_map; + /// Map parent key to range of children from keys array + HashMap parent_to_children_keys_range; + + /// Array of keys in hierarchy + PaddedPODArray keys; }; namespace detail @@ -252,7 +265,8 @@ namespace detail Strategy strategy, size_t & valid_keys) { - auto & parent_to_child = parent_to_child_index.parent_to_children_map; + auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range; + auto & children_keys = parent_to_child_index.keys; /// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants. /// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy. @@ -295,7 +309,7 @@ namespace detail { const UInt64 & requested_key = keys[i]; - if (parent_to_child.find(requested_key) == nullptr) + if (parent_to_children_keys_range.find(requested_key) == nullptr) { descendants_offsets.emplace_back(descendants.size()); continue; @@ -360,7 +374,7 @@ namespace detail } } - const auto * it = parent_to_child.find(key); + const auto * it = parent_to_children_keys_range.find(key); if (!it || depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH) continue; @@ -375,7 +389,8 @@ namespace detail { /// Put special signaling value on stack and update cache with range start size_t range_start_index = descendants.size(); - already_processed_keys_to_range[key].start_index = range_start_index; + Range range {range_start_index, range_start_index}; + already_processed_keys_to_range.insert(makePairNoInit(key, range)); next_keys_to_process_stack.emplace_back(KeyAndDepth{key, key_range_requires_update}); } @@ -383,15 +398,26 @@ namespace detail ++depth; - const auto & children = it->getMapped(); + DictionaryHierarchyParentToChildIndex::KeysRange children_range = it->getMapped(); - for (auto child_key : children) + for (; children_range.start_index < children_range.end_index; ++children_range.start_index) { + auto child_key = children_keys[children_range.start_index]; + /// In case of GetAllDescendantsStrategy we add any descendant to result array /// If strategy is GetDescendantsAtSpecificLevelStrategy we require depth == level - if (std::is_same_v || depth == level) + if constexpr (std::is_same_v) descendants.emplace_back(child_key); + if constexpr (std::is_same_v) + { + if (depth == level) + { + descendants.emplace_back(child_key); + continue; + } + } + next_keys_to_process_stack.emplace_back(KeyAndDepth{child_key, depth}); } } From be9c3d9bd46822139c06d917fcbb9772a7a9628c Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 12 May 2022 14:26:01 +0200 Subject: [PATCH 409/615] Fixed build --- src/Functions/FunctionsExternalDictionaries.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 4c1358f435c..231b48e1dfb 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1176,8 +1176,6 @@ public: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override { - std::cout << "FunctionDictGetDescendantsOverloadResolver::buildImpl" << std::endl; - auto dictionary = dictionary_helper->getDictionary(arguments[0].column); auto hierarchy_parent_to_child_index = dictionary->getHierarchyParentToChildIndex(); From 7e4c950bd9b51b55f0812b3b52aa63a1dc55dfbf Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 12 May 2022 14:45:06 +0200 Subject: [PATCH 410/615] Fixed style check --- src/Dictionaries/HashedArrayDictionary.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 92e2776ecbb..faec7d12791 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -761,11 +761,8 @@ void HashedArrayDictionary::calculateBytesAllocated() if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); - if (hierarchy_parent_to_child_index) - { + if (hierarchy_parent_to_child_index) bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes(); - std::cout << "Hierarchy index size " << hierarchy_parent_to_child_index->getSizeInBytes() << std::endl; - } bytes_allocated += string_arena.size(); } From f76e3801deeb39f360d706defa6a1f25b7243b8b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 12 May 2022 18:18:51 +0200 Subject: [PATCH 411/615] Fixed tests --- .../HierarchyDictionariesUtils.cpp | 20 +++++++++++++++++ src/Dictionaries/HierarchyDictionariesUtils.h | 22 +++---------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index 03a52263c91..cf93c1c4a64 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -100,6 +100,26 @@ namespace } } +ColumnPtr getKeysDescendantsArray( + const PaddedPODArray & requested_keys, + const DictionaryHierarchyParentToChildIndex & parent_to_child_index, + size_t level, + size_t & valid_keys) +{ + if (level == 0) + { + detail::GetAllDescendantsStrategy strategy { .level = level }; + auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child_index, strategy, valid_keys); + return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets)); + } + else + { + detail::GetDescendantsAtSpecificLevelStrategy strategy { .level = level }; + auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child_index, strategy, valid_keys); + return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets)); + } +} + ColumnPtr getKeysHierarchyDefaultImplementation( const IDictionary * dictionary, ColumnPtr key_column, diff --git a/src/Dictionaries/HierarchyDictionariesUtils.h b/src/Dictionaries/HierarchyDictionariesUtils.h index 9af6f9e01d9..b4087fb2f29 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.h +++ b/src/Dictionaries/HierarchyDictionariesUtils.h @@ -389,8 +389,7 @@ namespace detail { /// Put special signaling value on stack and update cache with range start size_t range_start_index = descendants.size(); - Range range {range_start_index, range_start_index}; - already_processed_keys_to_range.insert(makePairNoInit(key, range)); + already_processed_keys_to_range[key].start_index = range_start_index; next_keys_to_process_stack.emplace_back(KeyAndDepth{key, key_range_requires_update}); } @@ -477,26 +476,11 @@ ColumnUInt8::Ptr getKeysIsInHierarchyColumn( /// Returns descendants array column for keys /// /// @param valid_keys - number of keys that are valid in parent_to_child map -template ColumnPtr getKeysDescendantsArray( - const PaddedPODArray & requested_keys, + const PaddedPODArray & requested_keys, const DictionaryHierarchyParentToChildIndex & parent_to_child_index, size_t level, - size_t & valid_keys) -{ - if (level == 0) - { - detail::GetAllDescendantsStrategy strategy { .level = level }; - auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child_index, strategy, valid_keys); - return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets)); - } - else - { - detail::GetDescendantsAtSpecificLevelStrategy strategy { .level = level }; - auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child_index, strategy, valid_keys); - return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets)); - } -} + size_t & valid_keys); /** Default getHierarchy implementation for dictionaries that does not have structure with child to parent representation. * Implementation will build such structure with getColumn calls, and then getHierarchy for such structure. From 585b86446e164ee1196176fba7aa29a9c38ebf50 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 13 May 2022 12:48:47 +0200 Subject: [PATCH 412/615] Added hierarchical_index_bytes_allocated column in system.dictionaries --- src/Dictionaries/FlatDictionary.cpp | 19 +++--- src/Dictionaries/FlatDictionary.h | 9 ++- src/Dictionaries/HashedArrayDictionary.cpp | 19 +++--- src/Dictionaries/HashedArrayDictionary.h | 9 ++- src/Dictionaries/HashedDictionary.cpp | 19 +++--- src/Dictionaries/HashedDictionary.h | 9 ++- .../HierarchyDictionariesUtils.cpp | 2 +- src/Dictionaries/HierarchyDictionariesUtils.h | 28 ++++---- src/Dictionaries/IDictionary.h | 15 +++-- .../gtest_hierarchy_dictionaries_utils.cpp | 4 +- src/Functions/FunctionsExternalDictionaries.h | 20 +++--- .../System/StorageSystemDictionaries.cpp | 4 +- ..._dictionaries_hierarchical_index.reference | 6 ++ .../02294_dictionaries_hierarchical_index.sql | 66 +++++++++++++++++++ 14 files changed, 162 insertions(+), 67 deletions(-) create mode 100644 tests/queries/0_stateless/02294_dictionaries_hierarchical_index.reference create mode 100644 tests/queries/0_stateless/02294_dictionaries_hierarchical_index.sql diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 60959deee95..72fa5cfb094 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -245,10 +245,10 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( return result; } -DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchyParentToChildIndex() const +DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchicalIndex() const { - if (hierarchy_parent_to_child_index) - return hierarchy_parent_to_child_index; + if (hierarhical_index) + return hierarhical_index; size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; @@ -264,14 +264,14 @@ DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchyParentToChi parent_to_child[parent_key].emplace_back(static_cast(i)); } - return std::make_shared(parent_to_child); + return std::make_shared(parent_to_child); } ColumnPtr FlatDictionary::getDescendants( ColumnPtr key_column, const DataTypePtr &, size_t level, - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index) const + DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index) const { PaddedPODArray keys_backup; const auto & keys = getColumnVectorData(this, key_column, keys_backup); @@ -416,7 +416,7 @@ void FlatDictionary::buildHierarchyParentToChildIndexIfNeeded() return; if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional) - hierarchy_parent_to_child_index = getHierarchyParentToChildIndex(); + hierarhical_index = getHierarchicalIndex(); } void FlatDictionary::calculateBytesAllocated() @@ -458,8 +458,11 @@ void FlatDictionary::calculateBytesAllocated() if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); - if (hierarchy_parent_to_child_index) - bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes(); + if (hierarhical_index) + { + hierarchical_index_bytes_allocated = hierarhical_index->getSizeInBytes(); + bytes_allocated += hierarchical_index_bytes_allocated; + } bytes_allocated += string_arena.size(); } diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index 254c7c85375..9ae3e9a2e71 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -92,13 +92,15 @@ public: ColumnPtr in_key_column, const DataTypePtr & key_type) const override; - DictionaryHierarchyParentToChildIndexPtr getHierarchyParentToChildIndex() const override; + DictionaryHierarchicalParentToChildIndexPtr getHierarchicalIndex() const override; + + size_t getHierarchicalIndexBytesAllocated() const override { return hierarchical_index_bytes_allocated; } ColumnPtr getDescendants( ColumnPtr key_column, const DataTypePtr & key_type, size_t level, - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index) const override; + DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index) const override; Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override; @@ -173,6 +175,7 @@ private: std::vector loaded_keys; size_t bytes_allocated = 0; + size_t hierarchical_index_bytes_allocated = 0; size_t element_count = 0; size_t bucket_count = 0; mutable std::atomic query_count{0}; @@ -180,7 +183,7 @@ private: BlockPtr update_field_loaded_block; Arena string_arena; - DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index; + DictionaryHierarchicalParentToChildIndexPtr hierarhical_index; }; } diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index faec7d12791..fbf6e01f288 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -283,12 +283,12 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( } template -DictionaryHierarchyParentToChildIndexPtr HashedArrayDictionary::getHierarchyParentToChildIndex() const +DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary::getHierarchicalIndex() const { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - if (hierarchy_parent_to_child_index) - return hierarchy_parent_to_child_index; + if (hierarchical_index) + return hierarchical_index; size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; @@ -316,7 +316,7 @@ DictionaryHierarchyParentToChildIndexPtr HashedArrayDictionary(parent_to_child); + return std::make_shared(parent_to_child); } else { @@ -329,7 +329,7 @@ ColumnPtr HashedArrayDictionary::getDescendants( ColumnPtr key_column [[maybe_unused]], const DataTypePtr &, size_t level [[maybe_unused]], - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const + DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { @@ -718,7 +718,7 @@ void HashedArrayDictionary::buildHierarchyParentToChildInde return; if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional) - hierarchy_parent_to_child_index = getHierarchyParentToChildIndex(); + hierarchical_index = getHierarchicalIndex(); } template @@ -761,8 +761,11 @@ void HashedArrayDictionary::calculateBytesAllocated() if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); - if (hierarchy_parent_to_child_index) - bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes(); + if (hierarchical_index) + { + hierarchical_index_bytes_allocated = hierarchical_index->getSizeInBytes(); + bytes_allocated += hierarchical_index_bytes_allocated; + } bytes_allocated += string_arena.size(); } diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index 52e63701d61..8df9ce4c1f7 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -109,13 +109,15 @@ public: ColumnPtr in_key_column, const DataTypePtr & key_type) const override; - DictionaryHierarchyParentToChildIndexPtr getHierarchyParentToChildIndex() const override; + DictionaryHierarchicalParentToChildIndexPtr getHierarchicalIndex() const override; + + size_t getHierarchicalIndexBytesAllocated() const override { return hierarchical_index_bytes_allocated; } ColumnPtr getDescendants( ColumnPtr key_column, const DataTypePtr & key_type, size_t level, - DictionaryHierarchyParentToChildIndexPtr) const override; + DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index) const override; Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override; @@ -219,6 +221,7 @@ private: KeyAttribute key_attribute; size_t bytes_allocated = 0; + size_t hierarchical_index_bytes_allocated = 0; size_t element_count = 0; size_t bucket_count = 0; mutable std::atomic query_count{0}; @@ -226,7 +229,7 @@ private: BlockPtr update_field_loaded_block; Arena string_arena; - DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index; + DictionaryHierarchicalParentToChildIndexPtr hierarchical_index; }; extern template class HashedArrayDictionary; diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index ccfbf114fa4..8e42c6f1d54 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -319,12 +319,12 @@ ColumnUInt8::Ptr HashedDictionary::isInHierarchy( } template -DictionaryHierarchyParentToChildIndexPtr HashedDictionary::getHierarchyParentToChildIndex() const +DictionaryHierarchyParentToChildIndexPtr HashedDictionary::getHierarchicalIndex() const { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - if (hierarchy_parent_to_child_index) - return hierarchy_parent_to_child_index; + if (hierarchical_index) + return hierarchical_index; size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; @@ -335,7 +335,7 @@ DictionaryHierarchyParentToChildIndexPtr HashedDictionary(parent_to_child); + return std::make_shared(parent_to_child); } else { @@ -348,7 +348,7 @@ ColumnPtr HashedDictionary::getDescendants( ColumnPtr key_column [[maybe_unused]], const DataTypePtr &, size_t level [[maybe_unused]], - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const + DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { @@ -657,7 +657,7 @@ void HashedDictionary::buildHierarchyParentToChildI return; if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional) - hierarchy_parent_to_child_index = getHierarchyParentToChildIndex(); + hierarchical_index = getHierarchicalIndex(); } template @@ -716,8 +716,11 @@ void HashedDictionary::calculateBytesAllocated() if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); - if (hierarchy_parent_to_child_index) - bytes_allocated += hierarchy_parent_to_child_index->getSizeInBytes(); + if (hierarchical_index) + { + hierarchical_index_bytes_allocated = hierarchical_index->getSizeInBytes(); + bytes_allocated += hierarchical_index_bytes_allocated; + } bytes_allocated += string_arena.size(); } diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 75b8ed33a43..ba5d284466a 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -110,13 +110,15 @@ public: ColumnPtr in_key_column, const DataTypePtr & key_type) const override; - DictionaryHierarchyParentToChildIndexPtr getHierarchyParentToChildIndex() const override; + DictionaryHierarchicalParentToChildIndexPtr getHierarchicalIndex() const override; + + size_t getHierarchicalIndexBytesAllocated() const override { return hierarchical_index_bytes_allocated; } ColumnPtr getDescendants( ColumnPtr key_column, const DataTypePtr & key_type, size_t level, - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index) const override; + DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index) const override; Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override; @@ -223,6 +225,7 @@ private: std::vector attributes; size_t bytes_allocated = 0; + size_t hierarchical_index_bytes_allocated = 0; size_t element_count = 0; size_t bucket_count = 0; mutable std::atomic query_count{0}; @@ -231,7 +234,7 @@ private: BlockPtr update_field_loaded_block; Arena string_arena; NoAttributesCollectionType no_attributes_container; - DictionaryHierarchyParentToChildIndexPtr hierarchy_parent_to_child_index; + DictionaryHierarchicalParentToChildIndexPtr hierarchical_index; }; extern template class HashedDictionary; diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index cf93c1c4a64..0b5f8478f7a 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -102,7 +102,7 @@ namespace ColumnPtr getKeysDescendantsArray( const PaddedPODArray & requested_keys, - const DictionaryHierarchyParentToChildIndex & parent_to_child_index, + const DictionaryHierarchicalParentToChildIndex & parent_to_child_index, size_t level, size_t & valid_keys) { diff --git a/src/Dictionaries/HierarchyDictionariesUtils.h b/src/Dictionaries/HierarchyDictionariesUtils.h index b4087fb2f29..6a59a37b5e7 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.h +++ b/src/Dictionaries/HierarchyDictionariesUtils.h @@ -14,10 +14,10 @@ namespace DB { -class DictionaryHierarchyParentToChildIndex; -using DictionaryHierarchyParentToChildIndexPtr = std::shared_ptr; +class DictionaryHierarchicalParentToChildIndex; +using DictionaryHierarchyParentToChildIndexPtr = std::shared_ptr; -class DictionaryHierarchyParentToChildIndex +class DictionaryHierarchicalParentToChildIndex { public: struct KeysRange @@ -26,7 +26,7 @@ public: UInt32 end_index; }; - explicit DictionaryHierarchyParentToChildIndex(const HashMap> & parent_to_children_map_) + explicit DictionaryHierarchicalParentToChildIndex(const HashMap> & parent_to_children_map_) { size_t parent_to_children_map_size = parent_to_children_map_.size(); @@ -35,14 +35,13 @@ public: for (auto & [parent, children] : parent_to_children_map_) { - UInt32 start_index = static_cast(keys.size()); - for (auto child : children) - { - keys.push_back(child); - } + size_t keys_size = keys.size(); + UInt32 start_index = static_cast(keys_size); + UInt32 end_index = start_index + static_cast(children.size()); - UInt32 end_index = static_cast(keys.size()); - parent_to_children_keys_range[parent] = DictionaryHierarchyParentToChildIndex::KeysRange{start_index, end_index}; + keys.insert(children.begin(), children.end()); + + parent_to_children_keys_range[parent] = KeysRange{start_index, end_index}; } } @@ -71,7 +70,6 @@ namespace detail bool operator()(UInt64 key [[maybe_unused]]) { return false; } }; - template struct GetParentKeyFuncInterface { std::optional operator()(UInt64 key [[maybe_unused]]) { return {}; } @@ -261,7 +259,7 @@ namespace detail template ElementsAndOffsets getDescendants( const PaddedPODArray & keys, - const DictionaryHierarchyParentToChildIndex & parent_to_child_index, + const DictionaryHierarchicalParentToChildIndex & parent_to_child_index, Strategy strategy, size_t & valid_keys) { @@ -397,7 +395,7 @@ namespace detail ++depth; - DictionaryHierarchyParentToChildIndex::KeysRange children_range = it->getMapped(); + DictionaryHierarchicalParentToChildIndex::KeysRange children_range = it->getMapped(); for (; children_range.start_index < children_range.end_index; ++children_range.start_index) { @@ -478,7 +476,7 @@ ColumnUInt8::Ptr getKeysIsInHierarchyColumn( /// @param valid_keys - number of keys that are valid in parent_to_child map ColumnPtr getKeysDescendantsArray( const PaddedPODArray & requested_keys, - const DictionaryHierarchyParentToChildIndex & parent_to_child_index, + const DictionaryHierarchicalParentToChildIndex & parent_to_child_index, size_t level, size_t & valid_keys); diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index d1e1bff1231..32c81beee6f 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -24,8 +24,8 @@ namespace ErrorCodes class IDictionary; using DictionaryPtr = std::unique_ptr; -class DictionaryHierarchyParentToChildIndex; -using DictionaryHierarchyParentToChildIndexPtr = std::shared_ptr; +class DictionaryHierarchicalParentToChildIndex; +using DictionaryHierarchicalParentToChildIndexPtr = std::shared_ptr; /** DictionaryKeyType provides IDictionary client information about * which key type is supported by dictionary. @@ -231,18 +231,23 @@ public: getDictionaryID().getNameForLogs()); } - virtual DictionaryHierarchyParentToChildIndexPtr getHierarchyParentToChildIndex() const + virtual DictionaryHierarchicalParentToChildIndexPtr getHierarchicalIndex() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Method getParentToChildIndex is not supported for {} dictionary.", + "Method getHierarchicalIndex is not supported for {} dictionary.", getDictionaryID().getNameForLogs()); } + virtual size_t getHierarchicalIndexBytesAllocated() const + { + return 0; + } + virtual ColumnPtr getDescendants( ColumnPtr key_column [[maybe_unused]], const DataTypePtr & key_type [[maybe_unused]], size_t level [[maybe_unused]], - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const + DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDescendants is not supported for {} dictionary.", diff --git a/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp b/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp index 8d0700af8cf..a68f23a978f 100644 --- a/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp +++ b/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp @@ -151,7 +151,7 @@ TEST(HierarchyDictionariesUtils, getDescendants) parent_to_child[1].emplace_back(3); parent_to_child[2].emplace_back(4); - auto parent_to_child_index = std::make_shared(parent_to_child); + auto parent_to_child_index = std::make_shared(parent_to_child); PaddedPODArray keys = {0, 1, 2, 3, 4}; @@ -197,7 +197,7 @@ TEST(HierarchyDictionariesUtils, getDescendants) parent_to_child[1].emplace_back(2); parent_to_child[2].emplace_back(1); - auto parent_to_child_index = std::make_shared(parent_to_child); + auto parent_to_child_index = std::make_shared(parent_to_child); PaddedPODArray keys = {1, 2, 3}; diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 231b48e1dfb..fc7057bad0a 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1059,11 +1059,11 @@ public: FunctionDictGetDescendantsExecutable( String name_, size_t level_, - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index_, + DictionaryHierarchicalParentToChildIndexPtr hierarchical_parent_to_child_index, std::shared_ptr dictionary_helper_) : name(name_) , level(level_) - , parent_to_child_index(std::move(parent_to_child_index_)) + , hierarchical_parent_to_child_index(std::move(hierarchical_parent_to_child_index)) , dictionary_helper(std::move(dictionary_helper_)) {} @@ -1084,13 +1084,13 @@ public: auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); - ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level, parent_to_child_index); + ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level, hierarchical_parent_to_child_index); return result; } String name; size_t level; - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index; + DictionaryHierarchicalParentToChildIndexPtr hierarchical_parent_to_child_index; std::shared_ptr dictionary_helper; }; @@ -1102,13 +1102,13 @@ public: const DataTypes & argument_types_, const DataTypePtr & result_type_, size_t level_, - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index, + DictionaryHierarchicalParentToChildIndexPtr hierarchical_parent_to_child_index, std::shared_ptr helper_) : name(name_) , argument_types(argument_types_) , result_type(result_type_) , level(level_) - , parent_to_child_index(std::move(parent_to_child_index)) + , hierarchical_parent_to_child_index(std::move(hierarchical_parent_to_child_index)) , helper(std::move(helper_)) {} @@ -1122,14 +1122,14 @@ public: ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override { - return std::make_shared(name, level, parent_to_child_index, helper); + return std::make_shared(name, level, hierarchical_parent_to_child_index, helper); } String name; DataTypes argument_types; DataTypePtr result_type; size_t level; - DictionaryHierarchyParentToChildIndexPtr parent_to_child_index; + DictionaryHierarchicalParentToChildIndexPtr hierarchical_parent_to_child_index; std::shared_ptr helper; }; @@ -1177,7 +1177,7 @@ public: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override { auto dictionary = dictionary_helper->getDictionary(arguments[0].column); - auto hierarchy_parent_to_child_index = dictionary->getHierarchyParentToChildIndex(); + auto hierarchical_parent_to_child_index = dictionary->getHierarchicalIndex(); size_t level = Strategy::default_level; @@ -1203,7 +1203,7 @@ public: for (auto & argument : arguments) argument_types.emplace_back(argument.type); - return std::make_shared(name, argument_types, result_type, level, hierarchy_parent_to_child_index, dictionary_helper); + return std::make_shared(name, argument_types, result_type, level, hierarchical_parent_to_child_index, dictionary_helper); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index c0d7d8cc4ed..5d6f288ab11 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -35,6 +35,7 @@ NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() {"attribute.names", std::make_shared(std::make_shared())}, {"attribute.types", std::make_shared(std::make_shared())}, {"bytes_allocated", std::make_shared()}, + {"hierarchical_index_bytes_allocated", std::make_shared()}, {"query_count", std::make_shared()}, {"hit_rate", std::make_shared()}, {"found_rate", std::make_shared()}, @@ -113,6 +114,7 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPt if (dict_ptr) { res_columns[i++]->insert(dict_ptr->getBytesAllocated()); + res_columns[i++]->insert(dict_ptr->getHierarchicalIndexBytesAllocated()); res_columns[i++]->insert(dict_ptr->getQueryCount()); res_columns[i++]->insert(dict_ptr->getHitRate()); res_columns[i++]->insert(dict_ptr->getFoundRate()); @@ -128,7 +130,7 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPt } else { - for (size_t j = 0; j != 9; ++j) // Number of empty fields if dict_ptr is null + for (size_t j = 0; j != 10; ++j) // Number of empty fields if dict_ptr is null res_columns[i++]->insertDefault(); } diff --git a/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.reference b/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.reference new file mode 100644 index 00000000000..8f421ba4524 --- /dev/null +++ b/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.reference @@ -0,0 +1,6 @@ +1 0 +1 +1 0 +1 +1 0 +0 diff --git a/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.sql b/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.sql new file mode 100644 index 00000000000..25e257c67d5 --- /dev/null +++ b/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.sql @@ -0,0 +1,66 @@ +DROP TABLE IF EXISTS test_hierarchy_source_table; +CREATE TABLE test_hierarchy_source_table +( + id UInt64, + parent_id UInt64 +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_hierarchy_source_table VALUES (1, 0); + +DROP DICTIONARY IF EXISTS hierarchy_flat_dictionary_index; +CREATE DICTIONARY hierarchy_flat_dictionary_index +( + id UInt64, + parent_id UInt64 BIDIRECTIONAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarchy_source_table')) +LAYOUT(FLAT()) +LIFETIME(0); -- {serverError 36 } + +DROP DICTIONARY IF EXISTS hierarchy_flat_dictionary_index; +CREATE DICTIONARY hierarchy_flat_dictionary_index +( + id UInt64, + parent_id UInt64 HIERARCHICAL BIDIRECTIONAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarchy_source_table')) +LAYOUT(FLAT()) +LIFETIME(0); + +SELECT * FROM hierarchy_flat_dictionary_index; +SELECT hierarchical_index_bytes_allocated > 0 FROM system.dictionaries WHERE name = 'hierarchy_flat_dictionary_index' AND database = currentDatabase(); + +DROP DICTIONARY hierarchy_flat_dictionary_index; + +DROP DICTIONARY IF EXISTS hierarchy_hashed_dictionary_index; +CREATE DICTIONARY hierarchy_hashed_dictionary_index +( + id UInt64, + parent_id UInt64 HIERARCHICAL BIDIRECTIONAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarchy_source_table')) +LAYOUT(FLAT()) +LIFETIME(0); + +SELECT * FROM hierarchy_hashed_dictionary_index; +SELECT hierarchical_index_bytes_allocated > 0 FROM system.dictionaries WHERE name = 'hierarchy_hashed_dictionary_index' AND database = currentDatabase(); +DROP DICTIONARY hierarchy_hashed_dictionary_index; + +DROP DICTIONARY IF EXISTS hierarchy_hashed_array_dictionary_index; +CREATE DICTIONARY hierarchy_hashed_array_dictionary_index +( + id UInt64, + parent_id UInt64 HIERARCHICAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarchy_source_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(0); + +SELECT * FROM hierarchy_hashed_array_dictionary_index; +SELECT hierarchical_index_bytes_allocated > 0 FROM system.dictionaries WHERE name = 'hierarchy_hashed_array_dictionary_index' AND database = currentDatabase(); + +DROP DICTIONARY hierarchy_hashed_array_dictionary_index; From d35654226aad72f6c216ec1ebf9e7dd5e3067c03 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 13 May 2022 15:42:41 +0200 Subject: [PATCH 413/615] Fixed tests --- .../queries/0_stateless/02117_show_create_table_system.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index ad18e38adcc..37746ef0ced 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -161,6 +161,7 @@ CREATE TABLE system.dictionaries `attribute.names` Array(String), `attribute.types` Array(String), `bytes_allocated` UInt64, + `hierarchical_index_bytes_allocated` UInt64, `query_count` UInt64, `hit_rate` Float64, `found_rate` Float64, From fa76f9af34b037b5a2e7679e69876ce09b664601 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 13 May 2022 15:46:24 +0200 Subject: [PATCH 414/615] Update tests --- .../0_stateless/02294_dictionaries_hierarchical_index.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.sql b/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.sql index 25e257c67d5..bc2a1020ab8 100644 --- a/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.sql +++ b/tests/queries/0_stateless/02294_dictionaries_hierarchical_index.sql @@ -64,3 +64,4 @@ SELECT * FROM hierarchy_hashed_array_dictionary_index; SELECT hierarchical_index_bytes_allocated > 0 FROM system.dictionaries WHERE name = 'hierarchy_hashed_array_dictionary_index' AND database = currentDatabase(); DROP DICTIONARY hierarchy_hashed_array_dictionary_index; +DROP TABLE test_hierarchy_source_table; From e550843d567b4bf0954c7ff9c1714c5474b6098f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 12:45:16 +0200 Subject: [PATCH 415/615] BinaryFunctionVectorized remove macro --- src/Functions/FunctionMathBinaryFloat64.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Functions/FunctionMathBinaryFloat64.h b/src/Functions/FunctionMathBinaryFloat64.h index badbde280f1..aec20d30271 100644 --- a/src/Functions/FunctionMathBinaryFloat64.h +++ b/src/Functions/FunctionMathBinaryFloat64.h @@ -213,7 +213,7 @@ private: template -struct BinaryFunctionPlain +struct BinaryFunctionVectorized { static constexpr auto name = Name::name; static constexpr auto rows_per_iteration = 1; @@ -225,6 +225,4 @@ struct BinaryFunctionPlain } }; -#define BinaryFunctionVectorized BinaryFunctionPlain - } From 216184dfd34c3f7c1c2bda232114c910c578174d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 13:20:32 +0200 Subject: [PATCH 416/615] Rename one more file --- .../AzureBlobStorage/AzureObjectStorage.h | 2 +- .../registerDiskAzureBlobStorage.cpp | 2 +- .../DiskObjectStorageCommon.cpp} | 5 +++-- .../DiskObjectStorageCommon.h} | 10 ++++++--- .../ObjectStorages/HDFS/registerDiskHDFS.cpp | 2 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/S3/diskSettings.h | 2 +- src/Disks/ObjectStorages/S3/parseConfig.h | 17 +++++++-------- .../ObjectStorages/S3/registerDiskS3.cpp | 21 ++++++++++++------- src/IO/WriteBufferFromAzureBlobStorage.cpp | 1 - 10 files changed, 36 insertions(+), 28 deletions(-) rename src/Disks/{RemoteDisksCommon.cpp => ObjectStorages/DiskObjectStorageCommon.cpp} (97%) rename src/Disks/{RemoteDisksCommon.h => ObjectStorages/DiskObjectStorageCommon.h} (85%) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 9012449e284..37c3ba72ed9 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -3,7 +3,7 @@ #if USE_AZURE_BLOB_STORAGE -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index faaec6ee95c..ce8d906d699 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include diff --git a/src/Disks/RemoteDisksCommon.cpp b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp similarity index 97% rename from src/Disks/RemoteDisksCommon.cpp rename to src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp index da6ffed5f11..eb9d7107d39 100644 --- a/src/Disks/RemoteDisksCommon.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -8,7 +8,8 @@ namespace DB { namespace ErrorCodes -{extern const int BAD_ARGUMENTS; +{ + extern const int BAD_ARGUMENTS; } std::shared_ptr wrapWithCache( diff --git a/src/Disks/RemoteDisksCommon.h b/src/Disks/ObjectStorages/DiskObjectStorageCommon.h similarity index 85% rename from src/Disks/RemoteDisksCommon.h rename to src/Disks/ObjectStorages/DiskObjectStorageCommon.h index 661d4e293df..9cc3b971865 100644 --- a/src/Disks/RemoteDisksCommon.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageCommon.h @@ -2,18 +2,22 @@ #include #include + #include #include -#include -#include #include +#include +#include namespace DB { std::shared_ptr wrapWithCache( - std::shared_ptr disk, String cache_name, String cache_path, String metadata_path); + std::shared_ptr disk, + String cache_name, + String cache_path, + String metadata_path); std::pair prepareForLocalMetadata( const String & name, diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 0041da6881d..50ed52e5c78 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -1,7 +1,7 @@ #include +#include #include #include -#include #include namespace DB diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 5c8287e5d84..991474f5a98 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -3,7 +3,7 @@ #if USE_AWS_S3 #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index fc5a09ce825..b06e412b6e9 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -12,9 +12,9 @@ #include #include #include +#include #include #include -#include #include namespace DB diff --git a/src/Disks/ObjectStorages/S3/parseConfig.h b/src/Disks/ObjectStorages/S3/parseConfig.h index 2d14ce9468b..725c86ce462 100644 --- a/src/Disks/ObjectStorages/S3/parseConfig.h +++ b/src/Disks/ObjectStorages/S3/parseConfig.h @@ -6,15 +6,14 @@ #include #include -#include -#include "Disks/DiskCacheWrapper.h" -#include "Storages/StorageS3Settings.h" -#include "ProxyConfiguration.h" -#include "ProxyListConfiguration.h" -#include "ProxyResolverConfiguration.h" -#include "Disks/DiskRestartProxy.h" -#include "Disks/DiskLocal.h" -#include "Disks/RemoteDisksCommon.h" +#include +#include +#include +#include +#include +#include +#include +#include #include diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index fe3aa78b0b3..9c9c76ad451 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -9,19 +9,24 @@ #if USE_AWS_S3 #include -#include -#include -#include + +#include + #include -#include +#include +#include + +#include +#include #include #include #include -#include -#include -#include +#include #include -#include + +#include + +#include namespace DB { diff --git a/src/IO/WriteBufferFromAzureBlobStorage.cpp b/src/IO/WriteBufferFromAzureBlobStorage.cpp index 18e03b08817..51d8bf6aba2 100644 --- a/src/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/IO/WriteBufferFromAzureBlobStorage.cpp @@ -3,7 +3,6 @@ #if USE_AZURE_BLOB_STORAGE #include -#include #include #include From 6ae8a26fae92f53efac49e8c37f009a6c4febb95 Mon Sep 17 00:00:00 2001 From: metahys Date: Sun, 22 May 2022 12:00:13 +0800 Subject: [PATCH 417/615] fix deadlock during fetching part --- src/Storages/MergeTree/DataPartsExchange.cpp | 186 ++++++++++--------- 1 file changed, 98 insertions(+), 88 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 064447c54ad..8c59a1c00bc 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -453,112 +453,122 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( creds.setPassword(password); } - PooledReadWriteBufferFromHTTP in{ - uri, - Poco::Net::HTTPRequest::HTTP_POST, - {}, - timeouts, - creds, - DBMS_DEFAULT_BUFFER_SIZE, - 0, /* no redirects */ - data_settings->replicated_max_parallel_fetches_for_host - }; - - int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); - - ReservationPtr reservation; - size_t sum_files_size = 0; - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) + bool retry_without_zero_copy = false; { - readBinary(sum_files_size, in); - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) + PooledReadWriteBufferFromHTTP in{ + uri, + Poco::Net::HTTPRequest::HTTP_POST, + {}, + timeouts, + creds, + DBMS_DEFAULT_BUFFER_SIZE, + 0, /* no redirects */ + data_settings->replicated_max_parallel_fetches_for_host + }; + + int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); + + ReservationPtr reservation; + size_t sum_files_size = 0; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) { - IMergeTreeDataPart::TTLInfos ttl_infos; - String ttl_infos_string; - readBinary(ttl_infos_string, in); - ReadBufferFromString ttl_infos_buffer(ttl_infos_string); - assertString("ttl format version: 1\n", ttl_infos_buffer); - ttl_infos.read(ttl_infos_buffer); - if (!disk) + readBinary(sum_files_size, in); + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { - reservation - = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); - if (!reservation) + IMergeTreeDataPart::TTLInfos ttl_infos; + String ttl_infos_string; + readBinary(ttl_infos_string, in); + ReadBufferFromString ttl_infos_buffer(ttl_infos_string); + assertString("ttl format version: 1\n", ttl_infos_buffer); + ttl_infos.read(ttl_infos_buffer); + if (!disk) + { reservation - = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); + = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); + if (!reservation) + reservation + = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); + } + } + else if (!disk) + { + reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr); + if (!reservation) + reservation = data.reserveSpace(sum_files_size); } } else if (!disk) { - reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr); - if (!reservation) - reservation = data.reserveSpace(sum_files_size); + /// We don't know real size of part because sender server version is too old + reservation = data.makeEmptyReservationOnLargestDisk(); } - } - else if (!disk) - { - /// We don't know real size of part because sender server version is too old - reservation = data.makeEmptyReservationOnLargestDisk(); - } - if (!disk) - disk = reservation->getDisk(); + if (!disk) + disk = reservation->getDisk(); - bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch - && sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch); + bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch + && sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch); - String part_type = "Wide"; - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) - readStringBinary(part_type, in); + String part_type = "Wide"; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) + readStringBinary(part_type, in); - UUID part_uuid = UUIDHelpers::Nil; - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) - readUUIDText(part_uuid, in); + UUID part_uuid = UUIDHelpers::Nil; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) + readUUIDText(part_uuid, in); - String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); - if (!remote_fs_metadata.empty()) - { - if (!try_zero_copy) - throw Exception("Got unexpected 'remote_fs_metadata' cookie", ErrorCodes::LOGICAL_ERROR); - if (std::find(capability.begin(), capability.end(), remote_fs_metadata) == capability.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie {}, expect one from {}", remote_fs_metadata, fmt::join(capability, ", ")); - if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version); - if (part_type == "InMemory") - throw Exception("Got 'remote_fs_metadata' cookie for in-memory part", ErrorCodes::INCORRECT_PART_TYPE); - - try + String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); + if (!remote_fs_metadata.empty()) { - return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); - } - catch (const Exception & e) - { - if (e.code() != ErrorCodes::S3_ERROR && e.code() != ErrorCodes::ZERO_COPY_REPLICATION_ERROR) - throw; + if (!try_zero_copy) + throw Exception("Got unexpected 'remote_fs_metadata' cookie", ErrorCodes::LOGICAL_ERROR); + if (std::find(capability.begin(), capability.end(), remote_fs_metadata) == capability.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie {}, expect one from {}", remote_fs_metadata, fmt::join(capability, ", ")); + if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version); + if (part_type == "InMemory") + throw Exception("Got 'remote_fs_metadata' cookie for in-memory part", ErrorCodes::INCORRECT_PART_TYPE); - LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy.")); - /// Try again but without zero-copy - return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, - user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); + try + { + return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); + } + catch (const Exception & e) + { + if (e.code() != ErrorCodes::S3_ERROR && e.code() != ErrorCodes::ZERO_COPY_REPLICATION_ERROR) + throw; + + LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy.")); + /// Try again later but without zero-copy + retry_without_zero_copy = true; + } + } + else + { + auto storage_id = data.getStorageID(); + String new_part_path = part_type == "InMemory" ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; + auto entry = data.getContext()->getReplicatedFetchList().insert( + storage_id.getDatabaseName(), storage_id.getTableName(), + part_info.partition_id, part_name, new_part_path, + replica_path, uri, to_detached, sum_files_size); + + in.setNextCallback(ReplicatedFetchReadCallback(*entry)); + + size_t projections = 0; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) + readBinary(projections, in); + + MergeTreeData::DataPart::Checksums checksums; + return part_type == "InMemory" + ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) + : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); } } - auto storage_id = data.getStorageID(); - String new_part_path = part_type == "InMemory" ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; - auto entry = data.getContext()->getReplicatedFetchList().insert( - storage_id.getDatabaseName(), storage_id.getTableName(), - part_info.partition_id, part_name, new_part_path, - replica_path, uri, to_detached, sum_files_size); - - in.setNextCallback(ReplicatedFetchReadCallback(*entry)); - - size_t projections = 0; - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) - readBinary(projections, in); - - MergeTreeData::DataPart::Checksums checksums; - return part_type == "InMemory" - ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) - : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); + if (retry_without_zero_copy) + return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, + user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't fetch part and no retry. It is a bug."); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( From 70cc27ecac6ac5999b09b8436aec707aa51ee084 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 14:08:15 +0200 Subject: [PATCH 418/615] Test with different element types --- tests/performance/norm_distance.xml | 84 +++++++++++++++++++---------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index bd9db76554c..804c3cac4d4 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -1,14 +1,29 @@ + + + + element_type + + UInt8 + Int16 + Int32 + Int64 + Float32 + Float64 + + + + - CREATE TABLE vecs_d ( - v Array(Float32) + CREATE TABLE vecs_{element_type} ( + v Array({element_type}) ) ENGINE=Memory; - - INSERT INTO vecs_d + + INSERT INTO vecs_{element_type} SELECT v FROM ( SELECT number AS n, @@ -25,45 +40,58 @@ rand(n*10+9) ] AS v FROM system.numbers - LIMIT 30000000 + LIMIT 10000000 ); - + - CREATE TABLE tuples_d ( - t Tuple(Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32) + CREATE TABLE tuples_{element_type} ( + t Tuple( + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type} + ) ) ENGINE=Memory; - - INSERT INTO tuples_d - SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_d; - + + INSERT INTO tuples_{element_type} + SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type}; + + - select sum(dist) from (select L1Norm(t) as dist from tuples_d) - select sum(dist) from (select L2Norm(t) as dist from tuples_d) - select sum(dist) from (select LinfNorm(t) as dist from tuples_d) + SELECT sum(dist) FROM (SELECT L1Norm(t) AS dist FROM tuples_{element_type}) + SELECT sum(dist) FROM (SELECT L2Norm(t) AS dist FROM tuples_{element_type}) + SELECT sum(dist) FROM (SELECT LinfNorm(t) AS dist FROM tuples_{element_type}) - select sum(dist) from (select L1Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) - select sum(dist) from (select L2Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) - select sum(dist) from (select LinfDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) - select sum(dist) from (select cosineDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L1Distance(a, t) AS dist FROM tuples_{element_type}) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L2Distance(a, t) AS dist FROM tuples_{element_type}) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT LinfDistance(a, t) AS dist FROM tuples_{element_type}) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type}) - select sum(dist) from (select arrayL1Norm(v) as dist from vecs_d) - select sum(dist) from (select arrayL2Norm(v) as dist from vecs_d) - select sum(dist) from (select arrayLinfNorm(v) as dist from vecs_d) + SELECT sum(dist) FROM (SELECT arrayL1Norm(v) AS dist FROM vecs_{element_type}) + SELECT sum(dist) FROM (SELECT arrayL2Norm(v) AS dist FROM vecs_{element_type}) + SELECT sum(dist) FROM (SELECT arrayLinfNorm(v) AS dist FROM vecs_{element_type}) - select sum(dist) from (select arrayL1Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) - select sum(dist) from (select arrayL2Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) - select sum(dist) from (select arrayLinfDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) - select sum(dist) from (select arrayCosineDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL1Distance(a, v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL2Distance(a, v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayLinfDistance(a, v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayCosineDistance(a, v) AS dist FROM vecs_{element_type}) + + DROP TABLE vecs_{element_type} + DROP TABLE tuples_{element_type} - DROP TABLE vecs_d - DROP TABLE tuples_d From 41cb5b4afce2c4ffc5373f1dd940c2044f25bc6f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 14:35:26 +0200 Subject: [PATCH 419/615] RangeHashedDictionary added test --- ...nge_hashed_dictionary_range_cast.reference | 2 ++ ...311_range_hashed_dictionary_range_cast.sql | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 tests/queries/0_stateless/02311_range_hashed_dictionary_range_cast.reference create mode 100644 tests/queries/0_stateless/02311_range_hashed_dictionary_range_cast.sql diff --git a/tests/queries/0_stateless/02311_range_hashed_dictionary_range_cast.reference b/tests/queries/0_stateless/02311_range_hashed_dictionary_range_cast.reference new file mode 100644 index 00000000000..95a5cf09f70 --- /dev/null +++ b/tests/queries/0_stateless/02311_range_hashed_dictionary_range_cast.reference @@ -0,0 +1,2 @@ +Value +Value diff --git a/tests/queries/0_stateless/02311_range_hashed_dictionary_range_cast.sql b/tests/queries/0_stateless/02311_range_hashed_dictionary_range_cast.sql new file mode 100644 index 00000000000..623b369da38 --- /dev/null +++ b/tests/queries/0_stateless/02311_range_hashed_dictionary_range_cast.sql @@ -0,0 +1,30 @@ +DROP TABLE IF EXISTS dictionary_source_table; +CREATE TABLE dictionary_source_table +( + key UInt64, + start UInt64, + end UInt64, + value String +) Engine = TinyLog; + +INSERT INTO dictionary_source_table values (1, 0, 18446744073709551615, 'Value'); + +DROP DICTIONARY IF EXISTS range_hashed_dictionary; +CREATE DICTIONARY range_hashed_dictionary +( + key UInt64, + start UInt64, + end UInt64, + value String +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE 'dictionary_source_table')) +LAYOUT(RANGE_HASHED()) +RANGE(MIN start MAX end) +LIFETIME(0); + +SELECT dictGet('range_hashed_dictionary', 'value', toUInt64(1), toUInt64(18446744073709551615)); +SELECT dictGet('range_hashed_dictionary', 'value', toUInt64(1), toUInt64(-1)); + +DROP DICTIONARY range_hashed_dictionary; +DROP TABLE dictionary_source_table; From fe21b4ca9e0bf420fcec7a8d97c1eef05ed1560e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 14:41:07 +0200 Subject: [PATCH 420/615] Fixed style check --- src/Functions/FunctionsExternalDictionaries.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index fc7057bad0a..fc1bb07bda7 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1061,7 +1061,7 @@ public: size_t level_, DictionaryHierarchicalParentToChildIndexPtr hierarchical_parent_to_child_index, std::shared_ptr dictionary_helper_) - : name(name_) + : name(std::move(name_)) , level(level_) , hierarchical_parent_to_child_index(std::move(hierarchical_parent_to_child_index)) , dictionary_helper(std::move(dictionary_helper_)) @@ -1104,7 +1104,7 @@ public: size_t level_, DictionaryHierarchicalParentToChildIndexPtr hierarchical_parent_to_child_index, std::shared_ptr helper_) - : name(name_) + : name(std::move(name_)) , argument_types(argument_types_) , result_type(result_type_) , level(level_) @@ -1200,7 +1200,7 @@ public: DataTypes argument_types; argument_types.reserve(arguments.size()); - for (auto & argument : arguments) + for (const auto & argument : arguments) argument_types.emplace_back(argument.type); return std::make_shared(name, argument_types, result_type, level, hierarchical_parent_to_child_index, dictionary_helper); From 94772f9cfc857bb93bab0e66765359d75895d3df Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 14:43:13 +0200 Subject: [PATCH 421/615] Added performance tests --- .../performance/hierarchical_dictionaries.xml | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/performance/hierarchical_dictionaries.xml diff --git a/tests/performance/hierarchical_dictionaries.xml b/tests/performance/hierarchical_dictionaries.xml new file mode 100644 index 00000000000..e6030886a1c --- /dev/null +++ b/tests/performance/hierarchical_dictionaries.xml @@ -0,0 +1,55 @@ + + + + dictionary_layout + + flat + hashed + hashed_array + + + + + func + + dictGetHierarchy + dictGetDescendants + + + + + + CREATE TABLE hierarchical_dictionary_source_table + ( + id UInt64, + parent_id UInt64 + ) ENGINE = Memory; + + + + CREATE DICTIONARY hierarchical_{dictionary_layout}_dictionary + ( + id UInt64, + parent_id UInt64 HIERARCHICAL + ) + PRIMARY KEY id + SOURCE(CLICKHOUSE(DB 'default' TABLE 'hierarchical_dictionary_source_table')) + LAYOUT({dictionary_layout}()) + LIFETIME(0); + + + + INSERT INTO hierarchical_dictionary_source_table + SELECT number, rand64() % 250000 + FROM system.numbers + LIMIT 500000; + + + + SELECT {func}('hierarchical_{dictionary_layout}_dictionary', id) FROM hierarchical_{dictionary_layout}_dictionary FORMAT Null; + + + DROP TABLE IF EXISTS hierarchical_dictionary_source_table; + DROP DICTIONARY IF EXISTS hierarchical_{dictionary_layout}_dictionary; + + From e76597e5d39d4cc8edbef649551416a1ba8dd6c5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 14:46:21 +0200 Subject: [PATCH 422/615] Update src/Core/Settings.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f563260c48e..bdbf9be56fc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -86,7 +86,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ - M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Make sense for debug only.", 0) \ + M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \ From cef3d96cfe29040f0313aded0197488be13c8dae Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 14:46:25 +0200 Subject: [PATCH 423/615] Update src/Disks/ObjectStorages/S3/S3ObjectStorage.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index dc939e5e9dd..9c6c7664ffb 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -79,7 +79,7 @@ public: const WriteSettings & write_settings = {}) override; void listPrefix(const std::string & path, BlobsPathToSize & children) const override; - /// Remove file. Throws exception if file doesn't exists or it's a directory. + /// Remove file. Throws exception if file doesn't exist or it's a directory. void removeObject(const std::string & path) override; void removeObjects(const std::vector & paths) override; From dabb150a95bfe974b6fd9fe979db31a6eccad4ef Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 23 May 2022 13:05:59 +0000 Subject: [PATCH 424/615] Fix cast lowcard of nullable in JoinSwitcher --- src/Interpreters/JoinSwitcher.cpp | 2 +- .../0_stateless/02302_join_auto_lc_nullable_bug.reference | 1 + .../queries/0_stateless/02302_join_auto_lc_nullable_bug.sql | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.reference create mode 100644 tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql diff --git a/src/Interpreters/JoinSwitcher.cpp b/src/Interpreters/JoinSwitcher.cpp index 480d105ebb6..34c8bb4cfd5 100644 --- a/src/Interpreters/JoinSwitcher.cpp +++ b/src/Interpreters/JoinSwitcher.cpp @@ -66,7 +66,7 @@ void JoinSwitcher::switchJoin() for (const auto & sample_column : right_sample_block) { positions.emplace_back(tmp_block.getPositionByName(sample_column.name)); - is_nullable.emplace_back(sample_column.type->isNullable()); + is_nullable.emplace_back(JoinCommon::isNullable(sample_column.type)); } } diff --git a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.reference b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql new file mode 100644 index 00000000000..7f7285d5472 --- /dev/null +++ b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql @@ -0,0 +1,6 @@ + +SET max_bytes_in_join = '100', join_algorithm = 'auto'; + +SELECT 3 == count() FROM (SELECT toLowCardinality(toNullable(number)) AS l FROM system.numbers LIMIT 3) AS s1 +ANY LEFT JOIN (SELECT toLowCardinality(toNullable(number)) AS r FROM system.numbers LIMIT 4) AS s2 ON l = r +; From a0aa841909baefe639acf150c87eff1329c2bff7 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 23 May 2022 15:15:45 +0200 Subject: [PATCH 425/615] Fix failed thread stateless tests --- src/Storages/StorageSnapshot.cpp | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 07c4c794210..d935d73d03d 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -92,10 +92,32 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; - auto columns_description = getDescriptionForColumns(column_names); - for (const auto & column : columns_description) - res.insert({column.type->createColumn(), column.type, column.name}); - + const auto & columns = getMetadataForQuery()->getColumns(); + for (const auto & name : column_names) + { + auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); + auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); + if (column && !object_column) + { + res.insert({column->type->createColumn(), column->type, column->name}); + } + else if (object_column) + { + res.insert({object_column->type->createColumn(), object_column->type, object_column->name}); + } + else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) + { + /// Virtual columns must be appended after ordinary, because user can + /// override them. + const auto & type = it->second; + res.insert({type->createColumn(), type, name}); + } + else + { + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, + "Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs()); + } + } return res; } From 49b0fde46c4a3f04229d08213397b491620ff80b Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Mon, 23 May 2022 09:43:26 -0400 Subject: [PATCH 426/615] add ClickHouse Keeper to docs --- .../operations/settings/merge-tree-settings.md | 8 ++++---- docs/en/operations/settings/settings.md | 6 +++--- .../system-tables/distributed_ddl_queue.md | 2 +- docs/en/operations/system-tables/mutations.md | 2 +- docs/en/operations/system-tables/replicas.md | 16 ++++++++-------- .../system-tables/replication_queue.md | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 01177d6aae4..b672da83441 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -114,7 +114,7 @@ A large number of parts in a table reduces performance of ClickHouse queries and ## replicated_deduplication_window {#replicated-deduplication-window} -The number of most recently inserted blocks for which Zookeeper stores hash sums to check for duplicates. +The number of most recently inserted blocks for which ClickHouse Keeper stores hash sums to check for duplicates. Possible values: @@ -123,7 +123,7 @@ Possible values: Default value: 100. -The `Insert` command creates one or more blocks (parts). When inserting into Replicated tables, ClickHouse for [insert deduplication](../../engines/table-engines/mergetree-family/replication/) writes the hash sums of the created parts into Zookeeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from Zookeeper. +The `Insert` command creates one or more blocks (parts). For [insert deduplication](../../engines/table-engines/mergetree-family/replication/), when writing into replicated tables, ClickHouse writes the hash sums of the created parts into ClickHouse Keeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from ClickHouse Keeper. A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries. The hash sum is calculated from the composition of the field names and types and the data of the inserted part (stream of bytes). @@ -142,7 +142,7 @@ A deduplication mechanism is used, similar to replicated tables (see [replicated ## replicated_deduplication_window_seconds {#replicated-deduplication-window-seconds} -The number of seconds after which the hash sums of the inserted blocks are removed from Zookeeper. +The number of seconds after which the hash sums of the inserted blocks are removed from ClickHouse Keeper. Possible values: @@ -150,7 +150,7 @@ Possible values: Default value: 604800 (1 week). -Similar to [replicated_deduplication_window](#replicated-deduplication-window), `replicated_deduplication_window_seconds` specifies how long to store hash sums of blocks for insert deduplication. Hash sums older than `replicated_deduplication_window_seconds` are removed from Zookeeper, even if they are less than ` replicated_deduplication_window`. +Similar to [replicated_deduplication_window](#replicated-deduplication-window), `replicated_deduplication_window_seconds` specifies how long to store hash sums of blocks for insert deduplication. Hash sums older than `replicated_deduplication_window_seconds` are removed from ClickHouse Keeper, even if they are less than ` replicated_deduplication_window`. ## replicated_fetches_http_connection_timeout {#replicated_fetches_http_connection_timeout} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 76fbc5f239d..1249c3d7a95 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1838,7 +1838,7 @@ Usage By default, deduplication is not performed for materialized views but is done upstream, in the source table. If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behaviour exists to enable the insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table. -At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform a deduplication check by itself, +At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with ClickHouse Keeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform a deduplication check by itself, ignoring check result for the source table, and will insert rows lost because of the first failure. ## insert_deduplication_token {#insert_deduplication_token} @@ -2459,7 +2459,7 @@ Default value: 0. ## merge_selecting_sleep_ms {#merge_selecting_sleep_ms} -Sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to Zookeeper in large-scale clusters. +Sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. Possible values: @@ -2607,7 +2607,7 @@ Default value: 128. ## background_fetches_pool_size {#background_fetches_pool_size} -Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recommended to use default value. +Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster it is recommended to use default value. Possible values: diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index 0597972197d..ac2663bba19 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -15,7 +15,7 @@ Columns: - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. - `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds). -- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper). +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ClickHouse Keeper](../../operations/tips.md#zookeeper). **Example** diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index 507146d93de..6d74a0de9c3 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -8,7 +8,7 @@ Columns: - `table` ([String](../../sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied. -- `mutation_id` ([String](../../sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ZooKeeper. For non-replicated tables the IDs correspond to file names in the data directory of the table. +- `mutation_id` ([String](../../sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper or ZooKeeper. For non-replicated tables the IDs correspond to file names in the data directory of the table. - `command` ([String](../../sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`). diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index 6ec0f184e15..c65b0d294b0 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -62,13 +62,13 @@ Columns: Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. - `can_become_leader` (`UInt8`) - Whether the replica can be a leader. - `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. - This mode is turned on if the config does not have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper. -- `is_session_expired` (`UInt8`) - the session with ZooKeeper has expired. Basically the same as `is_readonly`. + This mode is turned on if the config does not have sections with ClickHouse Keeper, if an unknown error occurred when reinitializing sessions in ClickHouse Keeper, and during session reinitialization in ClickHouse Keeper. +- `is_session_expired` (`UInt8`) - the session with ClickHouse Keeper has expired. Basically the same as `is_readonly`. - `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that haven’t been done yet. - `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. -- `zookeeper_path` (`String`) - Path to table data in ZooKeeper. -- `replica_name` (`String`) - Replica name in ZooKeeper. Different replicas of the same table have different names. -- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating ‘zookeeper_path/replicas/replica_path’. +- `zookeeper_path` (`String`) - Path to table data in ClickHouse Keeper. +- `replica_name` (`String`) - Replica name in ClickHouse Keeper. Different replicas of the same table have different names. +- `replica_path` (`String`) - Path to replica data in ClickHouse Keeper. The same as concatenating ‘zookeeper_path/replicas/replica_path’. - `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet. - `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`. - `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. @@ -86,12 +86,12 @@ The next 4 columns have a non-zero value only where there is an active session w - `last_queue_update` (`DateTime`) - When the queue was updated last time. - `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. - `total_replicas` (`UInt8`) - The total number of known replicas of this table. -- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas). +- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas). - `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions. -- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ZooKeeper. +- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper. - `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active. -If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row. +If you request all the columns, the table may work a bit slowly, since several reads from ClickHouse Keeper are made for each row. If you do not request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly. For example, you can check that everything is working correctly like this: diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md index a8a51162dae..834f4a04757 100644 --- a/docs/en/operations/system-tables/replication_queue.md +++ b/docs/en/operations/system-tables/replication_queue.md @@ -1,6 +1,6 @@ # replication_queue {#system_tables-replication_queue} -Contains information about tasks from replication queues stored in ZooKeeper for tables in the `ReplicatedMergeTree` family. +Contains information about tasks from replication queues stored in Clickhouse Keeper, or ZooKeeper, for tables in the `ReplicatedMergeTree` family. Columns: @@ -8,11 +8,11 @@ Columns: - `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `replica_name` ([String](../../sql-reference/data-types/string.md)) — Replica name in ZooKeeper. Different replicas of the same table have different names. +- `replica_name` ([String](../../sql-reference/data-types/string.md)) — Replica name in ClickHouse Keeper. Different replicas of the same table have different names. - `position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Position of the task in the queue. -- `node_name` ([String](../../sql-reference/data-types/string.md)) — Node name in ZooKeeper. +- `node_name` ([String](../../sql-reference/data-types/string.md)) — Node name in ClickHouse Keeper. - `type` ([String](../../sql-reference/data-types/string.md)) — Type of the task in the queue, one of: From 1b17086266ed6cf146b0103f66ef9d4417cc5e72 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 16:01:44 +0200 Subject: [PATCH 427/615] Update src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h index 43c402a4508..3c638d94d3d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -11,7 +11,7 @@ class DiskObjectStorage; /// so it's possible to recover from this remote information in case of local disk loss. /// /// This mechanism can be enabled with `true` option inside -/// disk configuration. Implemented only for S3 and Azure Block storage. Other object storages +/// disk configuration. Implemented only for S3 and Azure Blob storage. Other object storages /// doesn't support metadata for blobs. /// /// FIXME: this class is very intrusive and use a lot of DiskObjectStorage internals. From 1a78ea75bb9f113aa61b834b38afae8e0f7cb369 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 16:01:51 +0200 Subject: [PATCH 428/615] Update src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h index 3c638d94d3d..89153e4a39c 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -12,7 +12,7 @@ class DiskObjectStorage; /// /// This mechanism can be enabled with `true` option inside /// disk configuration. Implemented only for S3 and Azure Blob storage. Other object storages -/// doesn't support metadata for blobs. +/// don't support metadata for blobs. /// /// FIXME: this class is very intrusive and use a lot of DiskObjectStorage internals. /// FIXME: it's very complex and unreliable, need to implement something better. From 2064934e5928b138564dbeffe14ba476f483150a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 16:02:29 +0200 Subject: [PATCH 429/615] Update src/Disks/ObjectStorages/DiskObjectStorage.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/DiskObjectStorage.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 163e2087bed..c235e1a864a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -423,8 +423,9 @@ void DiskObjectStorage::removeMetadata(const String & path, std::vector { for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects) { - paths_to_remove.push_back(fs::path(remote_fs_root_path) / remote_fs_object_path); - object_storage->removeFromCache(fs::path(remote_fs_root_path) / remote_fs_object_path); + String object_path = fs::path(remote_fs_root_path) / remote_fs_object_path; + paths_to_remove.push_back(object_path); + object_storage->removeFromCache(object_path); } return false; From b9bd3cb49f9aa5042ef78aaa33136e81dd057908 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Mon, 23 May 2022 10:06:16 -0400 Subject: [PATCH 430/615] update tips for ClickHouse Keeper --- docs/en/operations/tips.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 26dc59d72ba..a0a0391fb09 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -128,7 +128,7 @@ You should never use manually written scripts to transfer data between different If you want to divide an existing ZooKeeper cluster into two, the correct way is to increase the number of its replicas and then reconfigure it as two independent clusters. -Do not run ZooKeeper on the same servers as ClickHouse. Because ZooKeeper is very sensitive for latency and ClickHouse may utilize all available system resources. +You can run ClickHouse Keeper on the same server as ClickHouse, but do not run ZooKeeper on the same servers as ClickHouse. Because ZooKeeper is very sensitive for latency and ClickHouse may utilize all available system resources. You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers. From ce4adb447f18df89b85618b2a722405dc6ba87c4 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 23 May 2022 14:21:08 +0000 Subject: [PATCH 431/615] Fix named tuples output in ORC/Arrow/Parquet formats --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 1 - .../Formats/Impl/CHColumnToArrowColumn.cpp | 16 +++++++++------- .../Formats/Impl/ORCBlockOutputFormat.cpp | 18 ++++++++---------- .../Formats/Impl/ORCBlockOutputFormat.h | 2 +- ...12_parquet_orc_arrow_names_tuples.reference | 9 +++++++++ .../02312_parquet_orc_arrow_names_tuples.sql | 13 +++++++++++++ 6 files changed, 40 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.reference create mode 100644 tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 7c5dd2a03ea..c792d828e44 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -36,7 +36,6 @@ #include #include - /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index bd5a6368291..e3cc896466b 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -215,14 +214,16 @@ namespace DB std::unordered_map> & dictionary_values) { const auto * column_tuple = assert_cast(column.get()); - const auto & nested_types = assert_cast(column_type.get())->getElements(); + const auto * type_tuple = assert_cast(column_type.get()); + const auto & nested_types = type_tuple->getElements(); + const auto & nested_names = type_tuple->getElementNames(); arrow::StructBuilder & builder = assert_cast(*array_builder); for (size_t i = 0; i != column_tuple->tupleSize(); ++i) { ColumnPtr nested_column = column_tuple->getColumnPtr(i); - fillArrowArray(column_name + "." + std::to_string(i), nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end, output_string_as_string, dictionary_values); + fillArrowArray(column_name + "." + nested_names[i], nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end, output_string_as_string, dictionary_values); } for (size_t i = start; i != end; ++i) @@ -661,14 +662,15 @@ namespace DB if (isTuple(column_type)) { - const auto & nested_types = assert_cast(column_type.get())->getElements(); + const auto & tuple_type = assert_cast(column_type.get()); + const auto & nested_types = tuple_type->getElements(); + const auto & nested_names = tuple_type->getElementNames(); const auto * tuple_column = assert_cast(column.get()); std::vector> nested_fields; for (size_t i = 0; i != nested_types.size(); ++i) { - String name = column_name + "." + std::to_string(i); - auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, output_string_as_string, out_is_column_nullable); - nested_fields.push_back(std::make_shared(name, nested_arrow_type, *out_is_column_nullable)); + auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), nested_names[i], format_name, output_string_as_string, out_is_column_nullable); + nested_fields.push_back(std::make_shared(nested_names[i], nested_arrow_type, *out_is_column_nullable)); } return arrow::struct_(nested_fields); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index aaa3e8fe976..5e979c3d35a 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -55,7 +55,7 @@ ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & hea data_types.push_back(recursiveRemoveLowCardinality(type)); } -ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & type, const std::string & column_name) +ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & type) { switch (type->getTypeId()) { @@ -106,12 +106,12 @@ ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & t } case TypeIndex::Nullable: { - return getORCType(removeNullable(type), column_name); + return getORCType(removeNullable(type)); } case TypeIndex::Array: { const auto * array_type = assert_cast(type.get()); - return orc::createListType(getORCType(array_type->getNestedType(), column_name)); + return orc::createListType(getORCType(array_type->getNestedType())); } case TypeIndex::Decimal32: { @@ -131,21 +131,19 @@ ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & t case TypeIndex::Tuple: { const auto * tuple_type = assert_cast(type.get()); + const auto & nested_names = tuple_type->getElementNames(); const auto & nested_types = tuple_type->getElements(); auto struct_type = orc::createStructType(); for (size_t i = 0; i < nested_types.size(); ++i) - { - String name = column_name + "." + std::to_string(i); - struct_type->addStructField(name, getORCType(nested_types[i], name)); - } + struct_type->addStructField(nested_names[i], getORCType(nested_types[i])); return struct_type; } case TypeIndex::Map: { const auto * map_type = assert_cast(type.get()); return orc::createMapType( - getORCType(map_type->getKeyType(), column_name), - getORCType(map_type->getValueType(), column_name) + getORCType(map_type->getKeyType()), + getORCType(map_type->getValueType()) ); } default: @@ -514,7 +512,7 @@ void ORCBlockOutputFormat::prepareWriter() options.setCompression(orc::CompressionKind::CompressionKind_NONE); size_t columns_count = header.columns(); for (size_t i = 0; i != columns_count; ++i) - schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i]), header.safeGetByPosition(i).name)); + schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i]))); writer = orc::createWriter(*schema, &output_stream, options); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index f69fd1c0aab..d4a19353915 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -42,7 +42,7 @@ private: void consume(Chunk chunk) override; void finalizeImpl() override; - ORC_UNIQUE_PTR getORCType(const DataTypePtr & type, const std::string & column_name); + ORC_UNIQUE_PTR getORCType(const DataTypePtr & type); /// ConvertFunc is needed for type UInt8, because firstly UInt8 (char8_t) must be /// converted to unsigned char (bugprone-signed-char-misuse in clang). diff --git a/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.reference b/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.reference new file mode 100644 index 00000000000..bda59765af0 --- /dev/null +++ b/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.reference @@ -0,0 +1,9 @@ +(1,2) +(2,3) +(3,4) +(1,2) +(2,3) +(3,4) +(1,2) +(2,3) +(3,4) diff --git a/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql b/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql new file mode 100644 index 00000000000..54a7d8a11f1 --- /dev/null +++ b/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql @@ -0,0 +1,13 @@ +create table parquet_02312 (x Tuple(a UInt32, b UInt32)) engine=File(Parquet); +insert into parquet_02312 values ((1,2)), ((2,3)), ((3,4)); +select * from parquet_02312; +drop table parquet_02312; +create table parquet_02312 (x Tuple(a UInt32, b UInt32)) engine=File(Arrow); +insert into parquet_02312 values ((1,2)), ((2,3)), ((3,4)); +select * from parquet_02312; +drop table parquet_02312; +create table parquet_02312 (x Tuple(a UInt32, b UInt32)) engine=File(ORC); +insert into parquet_02312 values ((1,2)), ((2,3)), ((3,4)); +select * from parquet_02312; +drop table parquet_02312; + From 1e1e6d4fa0a6ee55f765903d0d0261a131d75bee Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 23 May 2022 16:24:06 +0200 Subject: [PATCH 432/615] Review fixes --- .../registerDiskAzureBlobStorage.cpp | 6 ++--- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 1 - .../ObjectStorages/HDFS/registerDiskHDFS.cpp | 5 ++-- src/Disks/ObjectStorages/IObjectStorage.h | 4 ++-- .../ObjectStorages/S3/S3ObjectStorage.cpp | 23 +++++++++++++++---- src/IO/ReadBufferFromAzureBlobStorage.h | 4 ++-- 6 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index ce8d906d699..92ba6e426b3 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -72,15 +72,13 @@ void registerDiskAzureBlobStorage(DiskFactory & factory) { auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); - FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); - + /// FIXME Cache currently unsupported :( ObjectStoragePtr azure_object_storage = std::make_unique( - std::move(cache), + nullptr, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context)); - uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 82dad1dece0..4574b8cb52c 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -60,7 +60,6 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI return std::make_unique(std::move(buf), settings->min_bytes_for_seek); } - /// Open the file for write and return WriteBufferFromFileBase object. std::unique_ptr HDFSObjectStorage::writeObject( /// NOLINT const std::string & path, WriteMode mode, diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 50ed52e5c78..04862e43c65 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -31,9 +31,8 @@ void registerDiskHDFS(DiskFactory & factory) config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), context_->getSettingsRef().hdfs_replication ); - FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context_); - - ObjectStoragePtr hdfs_storage = std::make_unique(std::move(cache), uri, std::move(settings), config); + /// FIXME Cache currently unsupported :( + ObjectStoragePtr hdfs_storage = std::make_unique(nullptr, uri, std::move(settings), config); auto metadata_disk = prepareForLocalMetadata(name, config, config_prefix, context_).second; uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index b96ddeb2de1..b9ac497f54f 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -72,14 +72,14 @@ public: /// at least size of object virtual ObjectMetadata getObjectMetadata(const std::string & path) const = 0; - /// Read single path from object storage, don't use cache + /// Read single path from object storage virtual std::unique_ptr readObject( /// NOLINT const std::string & path, const ReadSettings & read_settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const = 0; - /// Read multiple objects with common prefix, use cache + /// Read multiple objects with common prefix virtual std::unique_ptr readObjects( /// NOLINT const std::string & common_path_prefix, const BlobsPathToSize & blobs_to_read, diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 991474f5a98..8988a456f52 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -139,7 +139,16 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT std::optional) const { auto settings_ptr = s3_settings.get(); - return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, read_settings); + ReadSettings disk_read_settings{read_settings}; + if (cache) + { + if (IFileCache::isReadOnly()) + disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + + disk_read_settings.remote_fs_cache = cache; + } + + return std::make_unique(client.get(), bucket, path, version_id, settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); } @@ -190,12 +199,13 @@ void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & chi auto result = outcome.GetResult(); auto objects = result.GetContents(); - for (const auto & object : objects) - children.emplace_back(object.GetKey(), object.GetSize()); if (objects.empty()) break; + for (const auto & object : objects) + children.emplace_back(object.GetKey(), object.GetSize()); + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); } while (outcome.GetResult().GetIsTruncated()); } @@ -249,7 +259,8 @@ void S3ObjectStorage::removeObjects(const std::vector & paths) request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); + if (outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + throwIfError(outcome); } } @@ -265,7 +276,9 @@ void S3ObjectStorage::removeObjectIfExists(const std::string & path) Aws::S3::Model::DeleteObjectsRequest request; request.SetBucket(bucket); request.SetDelete(delkeys); - client_ptr->DeleteObjects(request); + auto outcome = client_ptr->DeleteObjects(request); + if (outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + throwIfError(outcome); } void S3ObjectStorage::removeObjectsIfExist(const std::vector & paths) diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h index b7459ccead1..e5a39b84d45 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/IO/ReadBufferFromAzureBlobStorage.h @@ -24,10 +24,10 @@ public: size_t max_single_download_retries_, size_t tmp_buffer_size_, bool use_external_buffer_ = false, - size_t read_until_position_ = 0 - ); + size_t read_until_position_ = 0); off_t seek(off_t off, int whence) override; + off_t getPosition() override; bool nextImpl() override; From d40b0461c570359521efec51a84d6cc0d64b27b1 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 23 May 2022 14:25:43 +0000 Subject: [PATCH 433/615] Update test --- ...2_parquet_orc_arrow_names_tuples.reference | 3 ++ .../02312_parquet_orc_arrow_names_tuples.sql | 40 +++++++++++++------ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.reference b/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.reference index bda59765af0..4697d53a23b 100644 --- a/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.reference +++ b/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.reference @@ -7,3 +7,6 @@ (1,2) (2,3) (3,4) +[[(1),(2),(3)]] +[[(1),(2),(3)]] +[[(1),(2),(3)]] diff --git a/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql b/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql index 54a7d8a11f1..4c2158e4a0c 100644 --- a/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql +++ b/tests/queries/0_stateless/02312_parquet_orc_arrow_names_tuples.sql @@ -1,13 +1,29 @@ -create table parquet_02312 (x Tuple(a UInt32, b UInt32)) engine=File(Parquet); -insert into parquet_02312 values ((1,2)), ((2,3)), ((3,4)); -select * from parquet_02312; -drop table parquet_02312; -create table parquet_02312 (x Tuple(a UInt32, b UInt32)) engine=File(Arrow); -insert into parquet_02312 values ((1,2)), ((2,3)), ((3,4)); -select * from parquet_02312; -drop table parquet_02312; -create table parquet_02312 (x Tuple(a UInt32, b UInt32)) engine=File(ORC); -insert into parquet_02312 values ((1,2)), ((2,3)), ((3,4)); -select * from parquet_02312; -drop table parquet_02312; +-- Tags: no-fasttest + +drop table if exists test_02312; +create table test_02312 (x Tuple(a UInt32, b UInt32)) engine=File(Parquet); +insert into test_02312 values ((1,2)), ((2,3)), ((3,4)); +select * from test_02312; +drop table test_02312; +create table test_02312 (x Tuple(a UInt32, b UInt32)) engine=File(Arrow); +insert into test_02312 values ((1,2)), ((2,3)), ((3,4)); +select * from test_02312; +drop table test_02312; +create table test_02312 (x Tuple(a UInt32, b UInt32)) engine=File(ORC); +insert into test_02312 values ((1,2)), ((2,3)), ((3,4)); +select * from test_02312; +drop table test_02312; + +create table test_02312 (a Nested(b Nested(c UInt32))) engine=File(Parquet); +insert into test_02312 values ([[(1), (2), (3)]]); +select * from test_02312; +drop table test_02312; +create table test_02312 (a Nested(b Nested(c UInt32))) engine=File(Arrow); +insert into test_02312 values ([[(1), (2), (3)]]); +select * from test_02312; +drop table test_02312; +create table test_02312 (a Nested(b Nested(c UInt32))) engine=File(ORC); +insert into test_02312 values ([[(1), (2), (3)]]); +select * from test_02312; +drop table test_02312; From c4f3d8bce1ad97c58ccb670bd53877d37d61ae77 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 23 May 2022 15:24:58 +0000 Subject: [PATCH 434/615] Fix test --- tests/queries/0_stateless/02293_formats_json_columns.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index 20eba0449d8..74d9a4f5aab 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -50,7 +50,7 @@ echo ' $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" $CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' $CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=1" echo ' From 034c7122be4968b3aaf64d6a0b9053bec71da7eb Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 23 May 2022 15:26:01 +0000 Subject: [PATCH 435/615] Mark JSONColumns supports subset of columns --- src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp index 935462a6fe4..22264d01a57 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp @@ -54,6 +54,7 @@ void registerInputFormatJSONColumns(FormatFactory & factory) return std::make_shared(buf, sample, settings, std::make_unique(buf)); } ); + factory.markFormatSupportsSubsetOfColumns("JSONColumns"); } void registerJSONColumnsSchemaReader(FormatFactory & factory) From e91e7fdba714affc0dce96a8283f98848fb8d6f6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 23 May 2022 15:33:29 +0000 Subject: [PATCH 436/615] Fix style --- src/Coordination/KeeperServer.cpp | 2 +- .../integration/test_keeper_force_recovery_single_node/test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index ec2cb0b2b84..d74ad173811 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -500,7 +500,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ raft_instance->forceReconfigure(config); // Single node cluster doesn't need to wait for any other nodes - // so we can finish recovering immediatelly after applying + // so we can finish recovering immediately after applying // new configuration if (config->get_servers().size() == 1) finish_recovering(); diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index f833cf96197..1e58a25221e 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -21,7 +21,7 @@ def get_nodes(): f"node{i+1}", main_configs=[ f"configs/enable_keeper{i+1}.xml", - f"configs/use_keeper.xml" + f"configs/use_keeper.xml", ], stay_alive=True, ) From 2658a9eeebf58ebefd2f1f5d53097d1a5284134d Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 17:01:19 +0200 Subject: [PATCH 437/615] Test with max_threads=1 --- tests/performance/norm_distance.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 804c3cac4d4..5311ee194ed 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -68,6 +68,9 @@ SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type}; + + 1 + From d0f5551c9f836860a8f0646ecf8df237a4d92015 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 18:27:41 +0200 Subject: [PATCH 438/615] Parameterized with norm kind --- tests/performance/norm_distance.xml | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 5311ee194ed..b6a7f9724c2 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -72,26 +72,26 @@ 1 + + + + norm + + L1 + L2 + Linf + + + + - - SELECT sum(dist) FROM (SELECT L1Norm(t) AS dist FROM tuples_{element_type}) - SELECT sum(dist) FROM (SELECT L2Norm(t) AS dist FROM tuples_{element_type}) - SELECT sum(dist) FROM (SELECT LinfNorm(t) AS dist FROM tuples_{element_type}) - - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L1Distance(a, t) AS dist FROM tuples_{element_type}) - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L2Distance(a, t) AS dist FROM tuples_{element_type}) - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT LinfDistance(a, t) AS dist FROM tuples_{element_type}) + SELECT sum(dist) FROM (SELECT {norm}Norm(t) AS dist FROM tuples_{element_type}) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, t) AS dist FROM tuples_{element_type}) WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type}) - - SELECT sum(dist) FROM (SELECT arrayL1Norm(v) AS dist FROM vecs_{element_type}) - SELECT sum(dist) FROM (SELECT arrayL2Norm(v) AS dist FROM vecs_{element_type}) - SELECT sum(dist) FROM (SELECT arrayLinfNorm(v) AS dist FROM vecs_{element_type}) - - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL1Distance(a, v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL2Distance(a, v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayLinfDistance(a, v) AS dist FROM vecs_{element_type}) + SELECT sum(dist) FROM (SELECT array{norm}Norm(v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT array{norm}Distance(a, v) AS dist FROM vecs_{element_type}) WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayCosineDistance(a, v) AS dist FROM vecs_{element_type}) DROP TABLE vecs_{element_type} From 3651ef93fe0a26e79dd5bca719e1684218246657 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 23 May 2022 17:42:13 +0000 Subject: [PATCH 439/615] Fix performance test --- src/Processors/Formats/Impl/BinaryRowInputFormat.cpp | 1 - tests/performance/formats_columns_sampling.xml | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index ced8bd63c6f..d3de2fbf494 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -114,7 +114,6 @@ void registerInputFormatRowBinary(FormatFactory & factory) }; registerWithNamesAndTypes("RowBinary", register_func); - factory.markFormatSupportsSubsetOfColumns("RowBinaryWithNamesAndTypes"); factory.registerFileExtension("bin", "RowBinary"); } diff --git a/tests/performance/formats_columns_sampling.xml b/tests/performance/formats_columns_sampling.xml index f5dd4395de1..25f9dc000a3 100644 --- a/tests/performance/formats_columns_sampling.xml +++ b/tests/performance/formats_columns_sampling.xml @@ -8,13 +8,11 @@ format TabSeparatedWithNames - TabSeparatedRawWithNames CustomSeparatedWithNames CSVWithNames JSONEachRow JSONCompactEachRowWithNames TSKV - RowBinaryWithNamesAndTypes Avro ORC Parquet From a262492cc1007128c71b47d7d7b6d7275ba49846 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 23 May 2022 20:53:33 +0200 Subject: [PATCH 440/615] slightly better --- .../InterpreterTransactionControlQuery.cpp | 2 ++ src/Interpreters/MergeTreeTransaction.cpp | 8 ++++---- src/Interpreters/MergeTreeTransaction.h | 5 ++++- src/Interpreters/TransactionLog.cpp | 13 +++++++++++-- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index bdcc351c32b..1e4868788ba 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -84,6 +84,8 @@ BlockIO InterpreterTransactionControlQuery::executeCommit(ContextMutablePtr sess throw Exception(ErrorCodes::INVALID_TRANSACTION, "Transaction {} was rolled back", txn->tid); if (txn->getState() != MergeTreeTransaction::COMMITTED) throw Exception(ErrorCodes::LOGICAL_ERROR, "Transaction {} has invalid state {}", txn->tid, txn->getState()); + + csn = txn->getCSN(); } /// Wait for committed changes to become actually visible, so the next transaction in this session will see the changes diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp index 11287f5de97..cab40f3c6db 100644 --- a/src/Interpreters/MergeTreeTransaction.cpp +++ b/src/Interpreters/MergeTreeTransaction.cpp @@ -47,15 +47,15 @@ MergeTreeTransaction::State MergeTreeTransaction::getState() const return COMMITTED; } -bool MergeTreeTransaction::waitStateChange(CSN expected_state_csn) const +bool MergeTreeTransaction::waitStateChange(CSN current_state_csn) const { - CSN current_value = expected_state_csn; - while (current_value == expected_state_csn && !TransactionLog::instance().isShuttingDown()) + CSN current_value = current_state_csn; + while (current_value == current_state_csn && !TransactionLog::instance().isShuttingDown()) { csn.wait(current_value); current_value = csn.load(); } - return current_value != expected_state_csn; + return current_value != current_state_csn; } void MergeTreeTransaction::checkIsNotCancelled() const diff --git a/src/Interpreters/MergeTreeTransaction.h b/src/Interpreters/MergeTreeTransaction.h index f466262cb2e..309b8e3eeff 100644 --- a/src/Interpreters/MergeTreeTransaction.h +++ b/src/Interpreters/MergeTreeTransaction.h @@ -56,7 +56,10 @@ public: Float64 elapsedSeconds() const { return elapsed.elapsedSeconds(); } - bool waitStateChange(CSN expected_state_csn) const; + /// Waits for transaction state to become not equal to the state corresponding to current_state_csn + bool waitStateChange(CSN current_state_csn) const; + + CSN getCSN() const { return csn; } private: scope_guard beforeCommit(); diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 699190e2d6f..7b141ef4219 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -227,8 +227,15 @@ void TransactionLog::runUpdatingThread() if (connection_loss) { auto new_zookeeper = global_context->getZooKeeper(); - std::lock_guard lock{mutex}; - zookeeper = new_zookeeper; + { + std::lock_guard lock{mutex}; + zookeeper = new_zookeeper; + } + + /// It's possible that we connected to different [Zoo]Keeper instance + /// so we may read a bit stale state. Run some writing request before loading log entries + /// to make that instance up-to-date. + zookeeper->set(zookeeper_path_log, ""); } loadNewEntries(); @@ -446,6 +453,8 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool return Tx::CommittingCSN; } + /// Do not allow exceptions between commit point and the and of transaction finalization + /// (otherwise it may stuck in COMMITTING state holding snapshot). NOEXCEPT_SCOPE; /// FIXME Transactions: Sequential node numbers in ZooKeeper are Int32, but 31 bit is not enough for production use /// (overflow is possible in a several weeks/months of active usage) From fa21121f77ce2250b568441177a0608c488b1d2e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 23 May 2022 21:17:52 +0200 Subject: [PATCH 441/615] fix --- src/Interpreters/TransactionLog.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 7b141ef4219..4f0e79297b8 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -240,9 +240,7 @@ void TransactionLog::runUpdatingThread() loadNewEntries(); removeOldEntries(); - - if (connection_loss || fault_probability_before_commit || fault_probability_after_commit) - tryFinalizeUnknownStateTransactions(); + tryFinalizeUnknownStateTransactions(); } catch (const Coordination::Exception &) { From 1b3df3931116161846049a514d124fd68c6ebdad Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 23 May 2022 18:09:43 +0300 Subject: [PATCH 442/615] tests: fix table in 01710_projection_aggregation_in_order Signed-off-by: Azat Khuzhin Follow-up for: #37342 --- .../0_stateless/01710_projection_aggregation_in_order.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql index 557bd297436..add38dbd3f8 100644 --- a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql @@ -55,5 +55,5 @@ FROM numbers(100000); SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; -WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; -WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM agg WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM agg WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; From 293295815cfad337a4af29e0113d3159784bbf6a Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 23 May 2022 23:44:05 +0200 Subject: [PATCH 443/615] Fixed tests --- tests/queries/0_stateless/02311_normalize_utf8_constant.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02311_normalize_utf8_constant.sql b/tests/queries/0_stateless/02311_normalize_utf8_constant.sql index b7c9693b6b8..2747aa073ec 100644 --- a/tests/queries/0_stateless/02311_normalize_utf8_constant.sql +++ b/tests/queries/0_stateless/02311_normalize_utf8_constant.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + SELECT 'â' AS s, normalizeUTF8NFC(s) s1, From 838a6c6f61fd743440be68461f3d9e1a7bf56360 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 16 May 2022 22:09:23 +0000 Subject: [PATCH 444/615] Column declaration: [NOT] NULL right after type + fixed: data_type_default_nullable=true, it didn't make columns nullable if the column declaration contains default expression w/o type Issue #37229 --- src/Interpreters/InterpreterCreateQuery.cpp | 5 ++ src/Parsers/ParserCreateQuery.h | 37 +++++++++-- .../01269_create_with_null.reference | 6 +- .../0_stateless/01269_create_with_null.sql | 7 ++- ...mn_decl_null_before_defaul_value.reference | 22 +++++++ ...2_column_decl_null_before_defaul_value.sql | 61 +++++++++++++++++++ 6 files changed, 127 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.reference create mode 100644 tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index df53333b635..304cfa2f3f4 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -578,7 +578,12 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.type) column.type = name_type_it->type; else + { column.type = defaults_sample_block.getByName(column.name).type; + /// set nullability for case of column declaration w/o type but with default expression + if ((col_decl.null_modifier && *col_decl.null_modifier) || make_columns_nullable) + column.type = makeNullable(column.type); + } column.default_desc.kind = columnDefaultKindFromString(col_decl.default_specifier); column.default_desc.expression = default_expr; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 29cd08554b5..daf27c0dc67 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -105,9 +105,9 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - bool require_type = true; - bool allow_null_modifiers = false; - bool check_keywords_after_name = false; + const bool require_type = true; + const bool allow_null_modifiers = false; + const bool check_keywords_after_name = false; /// just for ALTER TABLE ALTER COLUMN use bool check_type_keyword = false; }; @@ -175,7 +175,22 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr ttl_expression; ASTPtr collation_expression; - if (!s_default.checkWithoutMoving(pos, expected) + auto null_check_without_moving = [&]() -> bool + { + if (!allow_null_modifiers) + return false; + + if (s_null.checkWithoutMoving(pos, expected)) + return true; + + Pos before_null = pos; + bool res = s_not.check(pos, expected) && s_null.checkWithoutMoving(pos, expected); + pos = before_null; + return res; + }; + + if (!null_check_without_moving() + && !s_default.checkWithoutMoving(pos, expected) && !s_materialized.checkWithoutMoving(pos, expected) && !s_ephemeral.checkWithoutMoving(pos, expected) && !s_alias.checkWithoutMoving(pos, expected) @@ -195,6 +210,18 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E } } + if (allow_null_modifiers) + { + if (s_not.check(pos, expected)) + { + if (!s_null.check(pos, expected)) + return false; + null_modifier.emplace(false); + } + else if (s_null.check(pos, expected)) + null_modifier.emplace(true); + } + Pos pos_before_specifier = pos; if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) || s_alias.ignore(pos, expected)) { @@ -230,7 +257,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type - if (type && allow_null_modifiers) + if ((type || default_expression) && allow_null_modifiers && !null_modifier.has_value()) { if (s_not.ignore(pos, expected)) { diff --git a/tests/queries/0_stateless/01269_create_with_null.reference b/tests/queries/0_stateless/01269_create_with_null.reference index 73f834da75a..4e52c4a42d6 100644 --- a/tests/queries/0_stateless/01269_create_with_null.reference +++ b/tests/queries/0_stateless/01269_create_with_null.reference @@ -1,7 +1,7 @@ Nullable(Int32) Int32 Nullable(Int32) Int32 CREATE TABLE default.data_null\n(\n `a` Nullable(Int32),\n `b` Int32,\n `c` Nullable(Int32),\n `d` Int32\n)\nENGINE = Memory -Nullable(Int32) Int32 Nullable(Int32) Nullable(Int32) -CREATE TABLE default.set_null\n(\n `a` Nullable(Int32),\n `b` Int32,\n `c` Nullable(Int32),\n `d` Nullable(Int32)\n)\nENGINE = Memory -CREATE TABLE default.set_null\n(\n `a` Nullable(Int32),\n `b` Int32,\n `c` Nullable(Int32),\n `d` Nullable(Int32)\n)\nENGINE = Memory +Nullable(Int32) Int32 Nullable(Int32) Nullable(Int32) Nullable(UInt8) +CREATE TABLE default.set_null\n(\n `a` Nullable(Int32),\n `b` Int32,\n `c` Nullable(Int32),\n `d` Nullable(Int32),\n `f` Nullable(UInt8) DEFAULT 1\n)\nENGINE = Memory +CREATE TABLE default.set_null\n(\n `a` Nullable(Int32),\n `b` Int32,\n `c` Nullable(Int32),\n `d` Nullable(Int32),\n `f` Nullable(UInt8) DEFAULT 1\n)\nENGINE = Memory CREATE TABLE default.cannot_be_nullable\n(\n `n` Nullable(Int8),\n `a` Array(UInt8)\n)\nENGINE = Memory CREATE TABLE default.cannot_be_nullable\n(\n `n` Nullable(Int8),\n `a` Array(UInt8)\n)\nENGINE = Memory diff --git a/tests/queries/0_stateless/01269_create_with_null.sql b/tests/queries/0_stateless/01269_create_with_null.sql index 7548070ce4b..ac57f613dfd 100644 --- a/tests/queries/0_stateless/01269_create_with_null.sql +++ b/tests/queries/0_stateless/01269_create_with_null.sql @@ -39,13 +39,14 @@ CREATE TABLE set_null ( a INT NULL, b INT NOT NULL, c Nullable(INT), - d INT + d INT, + f DEFAULT 1 ) engine=Memory(); -INSERT INTO set_null VALUES (NULL, 2, NULL, NULL); +INSERT INTO set_null VALUES (NULL, 2, NULL, NULL, NULL); -SELECT toTypeName(a), toTypeName(b), toTypeName(c), toTypeName(d) FROM set_null; +SELECT toTypeName(a), toTypeName(b), toTypeName(c), toTypeName(d), toTypeName(f) FROM set_null; SHOW CREATE TABLE set_null; DETACH TABLE set_null; diff --git a/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.reference b/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.reference new file mode 100644 index 00000000000..2079872ee73 --- /dev/null +++ b/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.reference @@ -0,0 +1,22 @@ +create table, column +type +NULL +id Nullable(Int32) +create table, column +type +NOT NULL +id Int32 +create table, column +type +NULL +DEFAULT +id Nullable(Int32) DEFAULT 1 +create table, column +type +NOT NULL +DEFAULT +id Int32 DEFAULT 1 +create table, column +type +DEFAULT +NULL +id Nullable(Int32) DEFAULT 1 +create table, column +type +DEFAULT +NOT NULL +id Int32 DEFAULT 1 +create table, column -type +NULL +DEFAULT +id Nullable(UInt8) DEFAULT 1 +create table, column -type +NOT NULL +DEFAULT +id UInt8 DEFAULT 1 +create table, column -type +DEFAULT +NULL +id Nullable(UInt8) DEFAULT 1 +create table, column -type +DEFAULT +NOT NULL +id UInt8 DEFAULT 1 +alter column, NULL modifier is not allowed +modify column, NULL modifier is not allowed diff --git a/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql b/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql new file mode 100644 index 00000000000..3825df1e557 --- /dev/null +++ b/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql @@ -0,0 +1,61 @@ +select 'create table, column +type +NULL'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id INT NULL) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column +type +NOT NULL'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id INT NOT NULL) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column +type +NULL +DEFAULT'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id INT NULL DEFAULT 1) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column +type +NOT NULL +DEFAULT'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id INT NOT NULL DEFAULT 1) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column +type +DEFAULT +NULL'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id INT DEFAULT 1 NULL) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column +type +DEFAULT +NOT NULL'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id INT DEFAULT 1 NOT NULL) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column -type +NULL +DEFAULT'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id NULL DEFAULT 1) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column -type +NOT NULL +DEFAULT'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id NOT NULL DEFAULT 1) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column -type +DEFAULT +NULL'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id DEFAULT 1 NULL) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'create table, column -type +DEFAULT +NOT NULL'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id DEFAULT 1 NOT NULL) ENGINE=MergeTree() ORDER BY tuple(); +DESCRIBE TABLE null_before; + +select 'alter column, NULL modifier is not allowed'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id INT NOT NULL) ENGINE=MergeTree() ORDER BY tuple(); +ALTER TABLE null_before ALTER COLUMN id TYPE INT NULL; -- { clientError SYNTAX_ERROR } + +select 'modify column, NULL modifier is not allowed'; +DROP TABLE IF EXISTS null_before SYNC; +CREATE TABLE null_before (id INT NOT NULL) ENGINE=MergeTree() ORDER BY tuple(); +ALTER TABLE null_before MODIFY COLUMN id NULL DEFAULT 1; -- { serverError UNKNOWN_TYPE } + +DROP TABLE IF EXISTS null_before SYNC; From ee0a8f32519a5715eafa01877b2b7ad42ef290f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 01:37:17 +0200 Subject: [PATCH 445/615] Fix exception messages --- programs/su/su.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/programs/su/su.cpp b/programs/su/su.cpp index 490e966955f..0bd2778209c 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -55,19 +55,19 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) group * result{}; if (0 != getgrnam_r(arg_gid.data(), &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name, specified in the CLICKHOUSE_SETGID environment variable ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); + throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); if (!result) - throw Exception("Group {} specified in the CLICKHOUSE_SETGID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Group {} is not found in the system", ErrorCodes::BAD_ARGUMENTS); gid = entry.gr_gid; } if (gid == 0) - throw Exception("Group specified in the CLICKHOUSE_SETGID environment variable has id 0, but dropping privileges to gid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Group has id 0, but dropping privileges to gid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); if (0 != setgid(gid)) - throwFromErrno(fmt::format("Cannot do 'setgid' to user, specified in the CLICKHOUSE_SETGID environment variable ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); + throwFromErrno(fmt::format("Cannot do 'setgid' to user ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); } if (!arg_uid.empty()) @@ -80,19 +80,19 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) passwd * result{}; if (0 != getpwnam_r(arg_uid.data(), &entry, buf.get(), buf_size, &result)) - throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name, specified in the CLICKHOUSE_SETUID environment variable ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); + throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); if (!result) - throw Exception("User {} specified in the CLICKHOUSE_SETUID environment variable is not found in the system", ErrorCodes::BAD_ARGUMENTS); + throw Exception("User {} is not found in the system", ErrorCodes::BAD_ARGUMENTS); uid = entry.pw_uid; } if (uid == 0) - throw Exception("User specified in the CLICKHOUSE_SETUID environment variable has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); + throw Exception("User has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); if (0 != setuid(uid)) - throwFromErrno(fmt::format("Cannot do 'setuid' to user, specified in the CLICKHOUSE_SETUID environment variable ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); + throwFromErrno(fmt::format("Cannot do 'setuid' to user ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); } } From e8c08cda0dd5d394c5a7b58911a7148f245f94a6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 01:38:17 +0200 Subject: [PATCH 446/615] Fix error in static-files-disk-uploader --- .../static-files-disk-uploader/static-files-disk-uploader.cpp | 2 +- programs/su/su.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp index a10c25c3342..07c066b0d59 100644 --- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp +++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp @@ -200,6 +200,6 @@ try } catch (...) { - std::cerr << DB::getCurrentExceptionMessage(false); + std::cerr << DB::getCurrentExceptionMessage(false) << '\n'; return 1; } diff --git a/programs/su/su.cpp b/programs/su/su.cpp index 0bd2778209c..90f1f47b4b9 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -133,6 +133,6 @@ try } catch (...) { - std::cerr << DB::getCurrentExceptionMessage(false); + std::cerr << DB::getCurrentExceptionMessage(false) << '\n'; return 1; } From 5b1bdfc3530abae8ae830e2426b6c691f5c49db4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 02:02:18 +0200 Subject: [PATCH 447/615] Fix error --- programs/su/su.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/programs/su/su.cpp b/programs/su/su.cpp index 90f1f47b4b9..7a108f3baef 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -128,7 +129,13 @@ try setUserAndGroup(std::move(user), std::move(group)); - execvp(argv[0], &argv[2]); + std::vector new_argv; + new_argv.reserve(argc - 1); + new_argv.insert(new_argv.begin(), argv + 2, argv + argc); + new_argv.push_back(nullptr); + + execvp(new_argv.front(), new_argv.data()); + throwFromErrno("Cannot execvp", ErrorCodes::SYSTEM_ERROR); } catch (...) From 3b0ecb46209f36da855c4157373bf2fb30e0dd3f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 03:55:28 +0300 Subject: [PATCH 448/615] Update cmake-in-clickhouse.md --- docs/en/development/cmake-in-clickhouse.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/en/development/cmake-in-clickhouse.md b/docs/en/development/cmake-in-clickhouse.md index 65d280df902..a2ea99ecb67 100644 --- a/docs/en/development/cmake-in-clickhouse.md +++ b/docs/en/development/cmake-in-clickhouse.md @@ -13,11 +13,6 @@ cmake .. \ -DCMAKE_C_COMPILER=$(which clang-13) \ -DCMAKE_CXX_COMPILER=$(which clang++-13) \ -DCMAKE_BUILD_TYPE=Debug \ - -DENABLE_CLICKHOUSE_ALL=OFF \ - -DENABLE_CLICKHOUSE_SERVER=ON \ - -DENABLE_CLICKHOUSE_CLIENT=ON \ - -DENABLE_LIBRARIES=OFF \ - -DUSE_UNWIND=ON \ -DENABLE_UTILS=OFF \ -DENABLE_TESTS=OFF ``` From 98138112c9d39cdc0dcd0fce8b2371f584fc452d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 03:55:51 +0300 Subject: [PATCH 449/615] Update cmake_in_clickhouse_header.md --- docs/_includes/cmake_in_clickhouse_header.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md index 02019f13964..c35668c2c40 100644 --- a/docs/_includes/cmake_in_clickhouse_header.md +++ b/docs/_includes/cmake_in_clickhouse_header.md @@ -9,11 +9,6 @@ cmake .. \ -DCMAKE_C_COMPILER=$(which clang-13) \ -DCMAKE_CXX_COMPILER=$(which clang++-13) \ -DCMAKE_BUILD_TYPE=Debug \ - -DENABLE_CLICKHOUSE_ALL=OFF \ - -DENABLE_CLICKHOUSE_SERVER=ON \ - -DENABLE_CLICKHOUSE_CLIENT=ON \ - -DENABLE_LIBRARIES=OFF \ - -DUSE_UNWIND=ON \ -DENABLE_UTILS=OFF \ -DENABLE_TESTS=OFF ``` From 29e6a7ed8c1cecea588693aa9e4b98bdba77e21e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 04:11:32 +0200 Subject: [PATCH 450/615] Keep controls in place when the page is scrolled horizontally --- programs/server/play.html | 50 +++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 06fc5d8de9a..6b530790ad0 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -81,6 +81,8 @@ { height: 100%; margin: 0; + /* This enables position: sticky on controls */ + overflow: auto; } html @@ -89,9 +91,26 @@ font-family: Liberation Sans, DejaVu Sans, sans-serif, Noto Color Emoji, Apple Color Emoji, Segoe UI Emoji; background: var(--background-color); color: var(--text-color); + } + + body + { + /* This element will show scroll-bar on overflow, and the scroll-bar will be outside of the padding. */ padding: 0.5rem; } + #controls + { + /* Make enough space for even huge queries. */ + height: 20%; + /* When a page will be scrolled horizontally due to large table size, keep controls in place. */ + position: sticky; + left: 0; + /* This allows query textarea to occupy the remaining height while other elements have fixed height. */ + display: flex; + flex-direction: column; + } + /* Otherwise Webkit based browsers will display ugly border on focus. */ textarea, input, button { @@ -129,8 +148,7 @@ #query_div { - /* Make enough space for even huge queries. */ - height: 20%; + height: 100%; } #query @@ -380,19 +398,21 @@ -
- -
-
- -
-
- -  (Ctrl/Cmd+Enter) - - - - 🌑🌞 +
+
+ +
+
+ +
+
+ +  (Ctrl/Cmd+Enter) + + + + 🌑🌞 +
From dd2b2380fad0732eee5f4549394ff4f9487695b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 04:22:51 +0200 Subject: [PATCH 451/615] Add a comment #37078 --- contrib/jemalloc-cmake/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 39ee7fd079d..c59b4da890b 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -170,6 +170,12 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) if (USE_UNWIND) + # jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. + # The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. + # At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. + + # ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). + target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) target_link_libraries (_jemalloc PRIVATE unwind) endif () From fd7642b6aa5a968c76928c1d4b2652607ef13dc0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 06:04:48 +0200 Subject: [PATCH 452/615] Fix "splitted" build --- programs/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 77a91dfc3ae..3e7a49515c6 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -323,8 +323,7 @@ if (CLICKHOUSE_SPLIT_BINARY) clickhouse-obfuscator clickhouse-git-import clickhouse-copier - clickhouse-static-files-disk-uploader - clickhouse-su) + clickhouse-static-files-disk-uploader) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) From 983e52cd3f974c2b27725825c907c150f7567cd9 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 24 May 2022 12:08:42 +0800 Subject: [PATCH 453/615] Aggresive filter pushdown for join --- .../QueryPlan/Optimizations/filterPushDown.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 9b7eed9f5ee..2625bf38bf7 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -77,8 +77,7 @@ static size_t tryAddNewFilterStep( /// New filter column is the first one. auto split_filter_column_name = (*split_filter->getIndex().begin())->result_name; node.step = std::make_unique( - node.children.at(0)->step->getOutputStream(), - std::move(split_filter), std::move(split_filter_column_name), true); + node.children.at(0)->step->getOutputStream(), std::move(split_filter), std::move(split_filter_column_name), true); return 3; } @@ -194,13 +193,13 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes /// Push down is for left table only. We need to update JoinStep for push down into right. /// Only inner and left join are supported. Other types may generate default values for left table keys. /// So, if we push down a condition like `key != 0`, not all rows may be filtered. - if (table_join.oneDisjunct() && (table_join.kind() == ASTTableJoin::Kind::Inner || table_join.kind() == ASTTableJoin::Kind::Left)) + if (table_join.kind() == ASTTableJoin::Kind::Inner || table_join.kind() == ASTTableJoin::Kind::Left) { const auto & left_header = join->getInputStreams().front().header; const auto & res_header = join->getOutputStream().header; Names allowed_keys; - const auto & key_names_left = table_join.getOnlyClause().key_names_left; - for (const auto & name : key_names_left) + const auto & source_columns = left_header.getNames(); + for (const auto & name : source_columns) { /// Skip key if it is renamed. /// I don't know if it is possible. Just in case. From 76ddb39d02c15446a76962c97ceb91b1283ce789 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 24 May 2022 12:09:00 +0800 Subject: [PATCH 454/615] refactor format --- src/Interpreters/ActionsDAG.cpp | 22 +++++++++---------- .../QueryPlan/Optimizations/optimizeTree.cpp | 8 +++---- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index f796a55ff72..2fc9b51674f 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -997,8 +997,8 @@ void ActionsDAG::addMaterializingOutputActions() const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node) { - FunctionOverloadResolverPtr func_builder_materialize = std::make_unique( - std::make_shared()); + FunctionOverloadResolverPtr func_builder_materialize + = std::make_unique(std::make_shared()); const auto & name = node.result_name; const auto * func = &addFunction(func_builder_materialize, {&node}, {}); @@ -1102,7 +1102,8 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( const auto * left_arg = dst_node; FunctionCastBase::Diagnostic diagnostic = {dst_node->result_name, res_elem.name}; - FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); NodeRawConstPtrs children = { left_arg, right_arg }; dst_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}); @@ -1150,7 +1151,8 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column) { auto adding_column_action = std::make_shared(); - FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); + FunctionOverloadResolverPtr func_builder_materialize + = std::make_unique(std::make_shared()); auto column_name = column.name; const auto * column_node = &adding_column_action->addColumn(std::move(column)); @@ -1612,7 +1614,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere std::stack stack; std::unordered_set visited_nodes; - stack.push(Frame{.node = predicate}); + stack.push({.node = predicate}); visited_nodes.insert(predicate); while (!stack.empty()) { @@ -1798,9 +1800,8 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( { Node * predicate = const_cast(tryFindInIndex(filter_name)); if (!predicate) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", - filter_name, dumpDAG()); + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", filter_name, dumpDAG()); /// If condition is constant let's do nothing. /// It means there is nothing to push down or optimization was already applied. @@ -1870,8 +1871,6 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( index_node = new_predicate; } } - - removeUnusedActions(false); } else { @@ -1926,10 +1925,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( predicate->function_base = predicate->function_builder->build(arguments); predicate->function = predicate->function_base->prepare(arguments); } - - removeUnusedActions(false); } + removeUnusedActions(false); return actions; } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 9a8dd151830..ff30cfd8cf3 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -34,7 +34,7 @@ void optimizeTree(const QueryPlanOptimizationSettings & settings, QueryPlan::Nod }; std::stack stack; - stack.push(Frame{.node = &root}); + stack.push({.node = &root}); size_t max_optimizations_to_apply = settings.max_optimizations_to_apply; size_t total_applied_optimizations = 0; @@ -50,10 +50,10 @@ void optimizeTree(const QueryPlanOptimizationSettings & settings, QueryPlan::Nod /// Traverse all children first. if (frame.next_child < frame.node->children.size()) { - stack.push(Frame + stack.push( { - .node = frame.node->children[frame.next_child], - .depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0, + .node = frame.node->children[frame.next_child], + .depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0, }); ++frame.next_child; From 6e49b76cfddbcf30467f17896227618e3b228faa Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Tue, 24 May 2022 06:33:31 +0530 Subject: [PATCH 455/615] try suppress h3 asan errors --- src/Functions/h3GetUnidirectionalEdge.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Functions/h3GetUnidirectionalEdge.cpp b/src/Functions/h3GetUnidirectionalEdge.cpp index 4981e973e36..978cb3d8d65 100644 --- a/src/Functions/h3GetUnidirectionalEdge.cpp +++ b/src/Functions/h3GetUnidirectionalEdge.cpp @@ -10,8 +10,6 @@ #include #include #include -#include -#include #include @@ -94,12 +92,21 @@ public: { const UInt64 origin = data_hindex_origin[row]; const UInt64 dest = data_hindex_dest[row]; - const UInt64 res = cellsToDirectedEdge(origin, dest); + UInt64 res = getUnidirectionalEdge(origin, dest); dst_data[row] = res; } return dst; } + + /// suppress asan errors generated by the following: + /// 'NEW_ADJUSTMENT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:142:24 + /// 'NEW_DIGIT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:121:24 + __attribute__((no_sanitize_address)) static inline UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest) + { + const UInt64 res = cellsToDirectedEdge(origin, dest); + return res; + } }; } From 65fbda436ad7813306ba3650581ba6adc2a35a06 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 20 May 2022 19:37:20 +0200 Subject: [PATCH 456/615] Do computations on the raw input data without copying to Eigen::Matrix --- src/Functions/array/arrayDistance.cpp | 342 ++++++++++-------- src/Functions/array/arrayNorm.cpp | 167 ++++----- .../02282_array_distance.reference | 8 +- 3 files changed, 287 insertions(+), 230 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index a533cb2c0cc..0e45a5f61c1 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,8 +8,6 @@ #include #include -#include - namespace DB { namespace ErrorCodes @@ -18,40 +17,98 @@ namespace ErrorCodes extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; } -template -struct LpDistance +struct L1Distance { - static inline String name = "L" + std::to_string(N); - template - static void compute(const Eigen::MatrixX & left, const Eigen::MatrixX & right, PaddedPODArray & array) + static inline String name = "L1"; + + template + struct State { - auto norms = (left - right).colwise().template lpNorm(); - array.reserve(norms.size()); - // array.insert() failed to work with Eigen iterators - for (auto n : norms) - array.push_back(n); + FloatType sum = 0; + }; + + template + static void accumulate(State & state, FirstArgType x, SecondArgType y) + { + state.sum += fabs(x - y); + } + + template + static ResultType finalize(const State & state) + { + return state.sum; } }; -struct LinfDistance : LpDistance +struct L2Distance { - static inline String name = "Linf"; + static inline String name = "L2"; + + template + struct State + { + FloatType sum = 0; + }; + + template + static void accumulate(State & state, FirstArgType x, SecondArgType y) + { + state.sum += (x - y) * (x - y); + } + + template + static ResultType finalize(const State & state) + { + return sqrt(state.sum); + } }; +struct LinfDistance +{ + static inline String name = "Linf"; + + template + struct State + { + FloatType dist = 0; + }; + + template + static void accumulate(State & state, FirstArgType x, SecondArgType y) + { + state.dist = fmax(state.dist, fabs(x - y)); + } + + template + static ResultType finalize(const State & state) + { + return state.dist; + } +}; struct CosineDistance { static inline String name = "Cosine"; - template - static void compute(const Eigen::MatrixX & left, const Eigen::MatrixX & right, PaddedPODArray & array) + + template + struct State { - auto prod = left.cwiseProduct(right).colwise().sum(); - auto nx = left.colwise().norm(); - auto ny = right.colwise().norm(); - auto nm = nx.cwiseProduct(ny).cwiseInverse(); - auto dist = 1.0 - prod.cwiseProduct(nm).array(); - array.reserve(dist.size()); - for (auto d : dist) - array.push_back(d); + FloatType dot_prod = 0; + FloatType x_squared = 0; + FloatType y_squared = 0; + }; + + template + static void accumulate(State & state, FirstArgType x, SecondArgType y) + { + state.dot_prod += x * y; + state.x_squared += x * x; + state.y_squared += y * y; + } + + template + static ResultType finalize(const State & state) + { + return 1 - state.dot_prod / sqrt(state.x_squared * state.y_squared); } }; @@ -102,7 +159,88 @@ public: } ColumnPtr - executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + switch (result_type->getTypeId()) + { + case TypeIndex::Float32: + return executeWithResultType(arguments, input_rows_count); + break; + case TypeIndex::Float64: + return executeWithResultType(arguments, input_rows_count); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type."); + } + } + + +#define SUPPORTED_TYPES(action) \ + action(UInt8) \ + action(UInt16) \ + action(UInt32) \ + action(UInt64) \ + action(Int8) \ + action(Int16) \ + action(Int32) \ + action(Int64) \ + action(Float32) \ + action(Float64) + + +private: + template + ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + DataTypePtr type_x = typeid_cast(arguments[0].type.get())->getNestedType(); + + /// Dynamic disaptch based on the 1st argument type + switch (type_x->getTypeId()) + { + #define ON_TYPE(type) \ + case TypeIndex::type: \ + return executeWithFirstType(arguments, input_rows_count); \ + break; + + SUPPORTED_TYPES(ON_TYPE) + #undef ON_TYPE + + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments of function {} has nested type {}. " + "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + getName(), type_x->getName()); + } + } + + template + ColumnPtr executeWithFirstType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + DataTypePtr type_y = typeid_cast(arguments[1].type.get())->getNestedType(); + + /// Dynamic disaptch based on the 2nd argument type + switch (type_y->getTypeId()) + { + #define ON_TYPE(type) \ + case TypeIndex::type: \ + return executeWithTypes(arguments, input_rows_count); \ + break; + + SUPPORTED_TYPES(ON_TYPE) + #undef ON_TYPE + + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments of function {} has nested type {}. " + "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + getName(), type_y->getName()); + } + } + + template + ColumnPtr executeWithTypes(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { DataTypePtr type_x = typeid_cast(arguments[0].type.get())->getNestedType(); DataTypePtr type_y = typeid_cast(arguments[1].type.get())->getNestedType(); @@ -110,136 +248,52 @@ public: ColumnPtr col_x = arguments[0].column->convertToFullColumnIfConst(); ColumnPtr col_y = arguments[1].column->convertToFullColumnIfConst(); - const auto * arr_x = assert_cast(col_x.get()); - const auto * arr_y = assert_cast(col_y.get()); + const auto & array_x = *assert_cast(col_x.get()); + const auto & array_y = *assert_cast(col_y.get()); - auto result = result_type->createColumn(); - switch (result_type->getTypeId()) + const auto & data_x = typeid_cast &>(array_x.getData()).getData(); + const auto & data_y = typeid_cast &>(array_y.getData()).getData(); + + const auto & offsets_x = array_x.getOffsets(); + const auto & offsets_y = array_y.getOffsets(); + + /// Check that all arrays in both columns are the sames size + for (size_t row = 0; row < offsets_x.size(); ++row) { - case TypeIndex::Float32: - executeWithType(*arr_x, *arr_y, type_x, type_y, result); - break; - case TypeIndex::Float64: - executeWithType(*arr_x, *arr_y, type_x, type_y, result); - break; - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type."); - } - return result; - } - -private: - template - void executeWithType( - const ColumnArray & array_x, - const ColumnArray & array_y, - const DataTypePtr & type_x, - const DataTypePtr & type_y, - MutableColumnPtr & column) const - { - Eigen::MatrixX mx, my; - columnToMatrix(array_x, type_x, mx); - columnToMatrix(array_y, type_y, my); - - if (mx.rows() && my.rows() && mx.rows() != my.rows()) - { - throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, - "Arguments of function {} have different array sizes: {} and {}", - getName(), mx.rows(), my.rows()); - } - auto & data = assert_cast &>(*column).getData(); - Kernel::compute(mx, my, data); - } - - template - void columnToMatrix(const ColumnArray & array, const DataTypePtr & nested_type, Eigen::MatrixX & mat) const - { - const auto & offsets = array.getOffsets(); - size_t cols = offsets.size(); - size_t rows = cols > 0 ? offsets.front() : 0; - - ColumnArray::Offset prev = 0; - for (ColumnArray::Offset off : offsets) - { - if (off - prev != rows) + if (unlikely(offsets_x[row] != offsets_y[row])) + { + ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0; throw Exception( ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, - "Arrays in a column passed to function {} have different sizes: {} and {}", - getName(), rows, off - prev); - prev = off; - } - - switch (nested_type->getTypeId()) - { - case TypeIndex::UInt8: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::UInt16: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::UInt32: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::UInt64: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::Int8: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::Int16: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::Int32: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::Int64: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::Float32: - fillMatrix(mat, array, rows, cols); - break; - case TypeIndex::Float64: - fillMatrix(mat, array, rows, cols); - break; - default: - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Arguments of function {} has nested type {}. " - "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", - getName(), nested_type->getName()); - } - } - - // optimize for float/ double - template - requires std::is_same_v - void fillMatrix(Eigen::MatrixX & mat, const ColumnArray & array, size_t rows, size_t cols) const - { - const auto & data = typeid_cast &>(array.getData()).getData(); - mat = Eigen::Map>(data.data(), rows, cols); - } - - template - void fillMatrix(Eigen::MatrixX & mat, const ColumnArray & array, size_t rows, size_t cols) const - { - const auto & data = typeid_cast &>(array.getData()).getData(); - mat.resize(rows, cols); - for (size_t col = 0; col < cols; ++col) - { - for (size_t row = 0; row < rows; ++row) - { - size_t off = col * rows; - mat(row, col) = static_cast(data[off + row]); + "Arguments of function {} have different array sizes: {} and {}", + getName(), offsets_x[row] - prev_offset, offsets_y[row] - prev_offset); } } + + auto result = ColumnVector::create(input_rows_count); + auto & result_data = result->getData(); + + /// Do the actual computation + ColumnArray::Offset prev = 0; + size_t row = 0; + for (auto off : offsets_x) + { + typename Kernel::template State state; + for (; prev < off; ++prev) + { + Kernel::accumulate(state, data_x[prev], data_y[prev]); + } + result_data[row] = Kernel::finalize(state); + row++; + } + return result; } }; void registerFunctionArrayDistance(FunctionFactory & factory) { - factory.registerFunction>>(); - factory.registerFunction>>(); + factory.registerFunction>(); + factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); } diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index 20fe85d7491..587c65a49ca 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -1,4 +1,6 @@ +#include #include +#include #include #include #include @@ -7,8 +9,6 @@ #include #include -#include - namespace DB { namespace ErrorCodes @@ -17,26 +17,59 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -template -struct LpNorm +struct L1Norm { - static inline String name = "L" + std::to_string(N); - template - static void compute(const std::vector> & vec, PaddedPODArray & array) + static inline String name = "L1"; + + template + inline static ResultType accumulate(ResultType result, ArgumentType value) { - array.reserve(vec.size()); - for (const auto & v : vec) - { - array.push_back(v.template lpNorm()); - } + return result + fabs(value); + } + + template + inline static ResultType finalize(ResultType result) + { + return result; } }; -struct LinfNorm : LpNorm +struct L2Norm +{ + static inline String name = "L2"; + + template + inline static ResultType accumulate(ResultType result, ArgumentType value) + { + return result + value * value; + } + + template + inline static ResultType finalize(ResultType result) + { + return sqrt(result); + } +}; + + +struct LinfNorm { static inline String name = "Linf"; + + template + inline static ResultType accumulate(ResultType result, ArgumentType value) + { + return fmax(result, fabs(value)); + } + + template + inline static ResultType finalize(ResultType result) + { + return result; + } }; + template class FunctionArrayNorm : public IFunction { @@ -84,72 +117,53 @@ public: } ColumnPtr - executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { DataTypePtr type = typeid_cast(arguments[0].type.get())->getNestedType(); ColumnPtr column = arguments[0].column->convertToFullColumnIfConst(); const auto * arr = assert_cast(column.get()); - auto result = result_type->createColumn(); switch (result_type->getTypeId()) { case TypeIndex::Float32: - executeWithType(*arr, type, result); + return executeWithResultType(*arr, type, input_rows_count); break; case TypeIndex::Float64: - executeWithType(*arr, type, result); + return executeWithResultType(*arr, type, input_rows_count); break; default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type."); } - return result; } private: - template - void executeWithType(const ColumnArray & array, const DataTypePtr & type, MutableColumnPtr & column) const - { - std::vector> vec; - columnToVectors(array, type, vec); - auto & data = assert_cast &>(*column).getData(); - Kernel::compute(vec, data); - } - template - void columnToVectors(const ColumnArray & array, const DataTypePtr & nested_type, std::vector> & vec) const +#define SUPPORTED_TYPES(action) \ + action(UInt8) \ + action(UInt16) \ + action(UInt32) \ + action(UInt64) \ + action(Int8) \ + action(Int16) \ + action(Int32) \ + action(Int64) \ + action(Float32) \ + action(Float64) + + + template + ColumnPtr executeWithResultType(const ColumnArray & array, const DataTypePtr & nested_type, size_t input_rows_count) const { switch (nested_type->getTypeId()) { - case TypeIndex::UInt8: - fillVectors(vec, array); - break; - case TypeIndex::UInt16: - fillVectors(vec, array); - break; - case TypeIndex::UInt32: - fillVectors(vec, array); - break; - case TypeIndex::UInt64: - fillVectors(vec, array); - break; - case TypeIndex::Int8: - fillVectors(vec, array); - break; - case TypeIndex::Int16: - fillVectors(vec, array); - break; - case TypeIndex::Int32: - fillVectors(vec, array); - break; - case TypeIndex::Int64: - fillVectors(vec, array); - break; - case TypeIndex::Float32: - fillVectors(vec, array); - break; - case TypeIndex::Float64: - fillVectors(vec, array); + #define ON_TYPE(type) \ + case TypeIndex::type: \ + return executeWithTypes(array, input_rows_count); \ break; + + SUPPORTED_TYPES(ON_TYPE) + #undef ON_TYPE + default: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, @@ -159,46 +173,35 @@ private: } } - template - requires std::is_same_v - void fillVectors(std::vector> & vec, const ColumnArray & array) const + template + static ColumnPtr executeWithTypes(const ColumnArray & array, size_t input_rows_count) { - const auto & data = typeid_cast &>(array.getData()).getData(); + const auto & data = typeid_cast &>(array.getData()).getData(); const auto & offsets = array.getOffsets(); - vec.reserve(offsets.size()); - ColumnArray::Offset prev = 0; - for (auto off : offsets) - { - vec.emplace_back(Eigen::Map>(data.data() + prev, off - prev)); - prev = off; - } - } - template - void fillVectors(std::vector> & vec, const ColumnArray & array) const - { - const auto & data = typeid_cast &>(array.getData()).getData(); - const auto & offsets = array.getOffsets(); - vec.reserve(offsets.size()); + auto result_col = ColumnVector::create(input_rows_count); + auto & result_data = result_col->getData(); ColumnArray::Offset prev = 0; + size_t row = 0; for (auto off : offsets) { - Eigen::VectorX mat(off - prev); - for (ColumnArray::Offset row = 0; row + prev < off; ++row) + Float64 result = 0; + for (; prev < off; ++prev) { - mat[row] = static_cast(data[prev + row]); + result = Kernel::accumulate(result, data[prev]); } - prev = off; - vec.emplace_back(mat); + result_data[row] = Kernel::finalize(result); + row++; } + return result_col; } }; void registerFunctionArrayNorm(FunctionFactory & factory) { - factory.registerFunction>>(); - factory.registerFunction>>(); + factory.registerFunction>(); + factory.registerFunction>(); factory.registerFunction>(); } diff --git a/tests/queries/0_stateless/02282_array_distance.reference b/tests/queries/0_stateless/02282_array_distance.reference index 158df656403..2fd6c66c817 100644 --- a/tests/queries/0_stateless/02282_array_distance.reference +++ b/tests/queries/0_stateless/02282_array_distance.reference @@ -1,7 +1,7 @@ 6 3.7416575 3 -0.0025851727 +0.002585097 \N nan 12 @@ -13,14 +13,14 @@ nan 2 5 4 -0.16847819 +0.16847816 0.35846698 -0.07417989 +0.0741799 6 8 9 0.020204102886728692 -0.11808289631180302 +0.11808289631180313 0 1 1 218.74642854227358 1 2 1348.2117786164013 From caad1435d53459020abb552109e42f0fa34c352a Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 20 May 2022 20:49:09 +0200 Subject: [PATCH 457/615] Optimized the case when one the argumnets is Const --- src/Functions/array/arrayDistance.cpp | 71 ++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 0e45a5f61c1..2ef1cab4647 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "base/range.h" namespace DB { @@ -224,7 +225,7 @@ private: { #define ON_TYPE(type) \ case TypeIndex::type: \ - return executeWithTypes(arguments, input_rows_count); \ + return executeWithTypes(arguments[0].column, arguments[1].column, input_rows_count); \ break; SUPPORTED_TYPES(ON_TYPE) @@ -240,13 +241,19 @@ private: } template - ColumnPtr executeWithTypes(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeWithTypes(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count) const { - DataTypePtr type_x = typeid_cast(arguments[0].type.get())->getNestedType(); - DataTypePtr type_y = typeid_cast(arguments[1].type.get())->getNestedType(); + if (typeid_cast(col_x.get())) + { + return executeWithTypesFirstArgConst(col_x, col_y, input_rows_count); + } + else if (typeid_cast(col_y.get())) + { + return executeWithTypesFirstArgConst(col_y, col_x, input_rows_count); + } - ColumnPtr col_x = arguments[0].column->convertToFullColumnIfConst(); - ColumnPtr col_y = arguments[1].column->convertToFullColumnIfConst(); + col_x = col_x->convertToFullColumnIfConst(); + col_y = col_y->convertToFullColumnIfConst(); const auto & array_x = *assert_cast(col_x.get()); const auto & array_y = *assert_cast(col_y.get()); @@ -257,7 +264,7 @@ private: const auto & offsets_x = array_x.getOffsets(); const auto & offsets_y = array_y.getOffsets(); - /// Check that all arrays in both columns are the sames size + /// Check that arrays in both columns are the sames size for (size_t row = 0; row < offsets_x.size(); ++row) { if (unlikely(offsets_x[row] != offsets_y[row])) @@ -288,6 +295,56 @@ private: } return result; } + + /// Special case when the 1st parameter is Const + template + ColumnPtr executeWithTypesFirstArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count) const + { + col_x = assert_cast(col_x.get())->getDataColumnPtr(); + col_y = col_y->convertToFullColumnIfConst(); + + const auto & array_x = *assert_cast(col_x.get()); + const auto & array_y = *assert_cast(col_y.get()); + + const auto & data_x = typeid_cast &>(array_x.getData()).getData(); + const auto & data_y = typeid_cast &>(array_y.getData()).getData(); + + const auto & offsets_x = array_x.getOffsets(); + const auto & offsets_y = array_y.getOffsets(); + + /// Check that arrays in both columns are the sames size + ColumnArray::Offset prev_offset = 0; + for (size_t row : collections::range(0, offsets_y.size())) + { + if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) + { + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "Arguments of function {} have different array sizes: {} and {}", + getName(), offsets_x[0], offsets_y[row] - prev_offset); + } + prev_offset = offsets_y[row]; + } + + auto result = ColumnVector::create(input_rows_count); + auto & result_data = result->getData(); + + /// Do the actual computation + ColumnArray::Offset prev = 0; + size_t row = 0; + for (auto off : offsets_y) + { + typename Kernel::template State state; + for (size_t i = 0; prev < off; ++i, ++prev) + { + Kernel::accumulate(state, data_x[i], data_y[prev]); + } + result_data[row] = Kernel::finalize(state); + row++; + } + return result; + } + }; void registerFunctionArrayDistance(FunctionFactory & factory) From 7d0ed7e51a364e6932ad93ab2d0458a3818203a4 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 19:37:01 +0200 Subject: [PATCH 458/615] Remove eigen library --- .gitmodules | 4 ---- contrib/CMakeLists.txt | 1 - contrib/eigen | 1 - contrib/eigen-cmake/CMakeLists.txt | 16 ---------------- docker/test/fasttest/run.sh | 1 - src/Functions/array/CMakeLists.txt | 2 +- 6 files changed, 1 insertion(+), 24 deletions(-) delete mode 160000 contrib/eigen delete mode 100644 contrib/eigen-cmake/CMakeLists.txt diff --git a/.gitmodules b/.gitmodules index 8b30973951f..55fd684fddb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -265,10 +265,6 @@ [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash.git -[submodule "contrib/eigen"] - path = contrib/eigen - url = https://github.com/eigen-mirror/eigen [submodule "contrib/hashidsxx"] path = contrib/hashidsxx url = https://github.com/schoentoon/hashidsxx.git - diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index a7f1a908474..943e0e0ebc1 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -153,7 +153,6 @@ endif() add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) -add_contrib (eigen-cmake eigen) # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear diff --git a/contrib/eigen b/contrib/eigen deleted file mode 160000 index 3147391d946..00000000000 --- a/contrib/eigen +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3147391d946bb4b6c68edd901f2add6ac1f31f8c diff --git a/contrib/eigen-cmake/CMakeLists.txt b/contrib/eigen-cmake/CMakeLists.txt deleted file mode 100644 index a37d341109c..00000000000 --- a/contrib/eigen-cmake/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -set(EIGEN_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/eigen") - -add_library (_eigen INTERFACE) - -# Only include MPL2 code from Eigen library -target_compile_definitions(_eigen INTERFACE EIGEN_MPL2_ONLY) - -# Clang by default mimics gcc 4.2.1 compatibility but Eigen checks __GNUC__ version to enable -# a workaround for bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 fixed in 6.3 -# So we fake gcc > 6.3 when building with clang -if (COMPILER_CLANG AND ARCH_PPC64LE) - target_compile_options(_eigen INTERFACE -fgnuc-version=6.4) -endif() - -target_include_directories (_eigen SYSTEM INTERFACE ${EIGEN_LIBRARY_DIR}) -add_library(ch_contrib::eigen ALIAS _eigen) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 3a660d9cf15..cafc62b365e 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -177,7 +177,6 @@ function clone_submodules contrib/jemalloc contrib/replxx contrib/wyhash - contrib/eigen contrib/hashidsxx ) diff --git a/src/Functions/array/CMakeLists.txt b/src/Functions/array/CMakeLists.txt index c98f4430078..9762674d6e9 100644 --- a/src/Functions/array/CMakeLists.txt +++ b/src/Functions/array/CMakeLists.txt @@ -1,7 +1,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions_array .) add_library(clickhouse_functions_array ${clickhouse_functions_array_sources} ${clickhouse_functions_array_headers}) -target_link_libraries(clickhouse_functions_array PRIVATE dbms clickhouse_functions_gatherutils ch_contrib::eigen) +target_link_libraries(clickhouse_functions_array PRIVATE dbms clickhouse_functions_gatherutils) if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) target_compile_options(clickhouse_functions_array PRIVATE "-g0") From e33cfc889cdb5371749afb8b07db470946781fec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 24 May 2022 03:47:17 -0500 Subject: [PATCH 459/615] Fix bug of datetime64 parsed from string '1969-12-31 23:59:59.123' (#37039) --- src/Core/DecimalFunctions.h | 4 +-- src/Core/tests/gtest_DecimalFunctions.cpp | 30 ++++++++++++++++++- src/IO/ReadHelpers.h | 21 +++++++++++-- src/IO/WriteHelpers.h | 21 +++++++++++-- .../02242_negative_datetime64.reference | 1 + .../0_stateless/02242_negative_datetime64.sql | 1 + 6 files changed, 70 insertions(+), 8 deletions(-) diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h index f08527ee4d5..331df9aa637 100644 --- a/src/Core/DecimalFunctions.h +++ b/src/Core/DecimalFunctions.h @@ -156,7 +156,7 @@ inline DecimalComponents splitWithScaleMultiplier( using T = typename DecimalType::NativeType; const auto whole = decimal.value / scale_multiplier; auto fractional = decimal.value % scale_multiplier; - if (fractional < T(0)) + if (whole && fractional < T(0)) fractional *= T(-1); return {whole, fractional}; @@ -199,7 +199,7 @@ inline typename DecimalType::NativeType getFractionalPartWithScaleMultiplier( /// Anycase we make modulo before compare to make scale_multiplier > 1 unaffected. T result = decimal.value % scale_multiplier; if constexpr (!keep_sign) - if (result < T(0)) + if (decimal.value / scale_multiplier && result < T(0)) result = -result; return result; diff --git a/src/Core/tests/gtest_DecimalFunctions.cpp b/src/Core/tests/gtest_DecimalFunctions.cpp index 7517edda937..1712785488e 100644 --- a/src/Core/tests/gtest_DecimalFunctions.cpp +++ b/src/Core/tests/gtest_DecimalFunctions.cpp @@ -176,7 +176,7 @@ INSTANTIATE_TEST_SUITE_P(Basic, } }, { - "When scale is not 0 and whole part is 0.", + "For positive Decimal value, with scale not 0, and whole part is 0.", 123, 3, { @@ -184,6 +184,16 @@ INSTANTIATE_TEST_SUITE_P(Basic, 123 } }, + { + "For negative Decimal value, with scale not 0, and whole part is 0.", + -123, + 3, + { + 0, + -123 + } + }, + { "For negative Decimal value whole part is negative, fractional is non-negative.", -1234567'89, @@ -216,6 +226,24 @@ INSTANTIATE_TEST_SUITE_P(Basic, 187618332, 123 } + }, + { + "Negative timestamp 1969-12-31 23:59:59.123 UTC", + DateTime64(-877), + 3, + { + 0, + -877 + } + }, + { + "Positive timestamp 1970-01-01 00:00:00.123 UTC", + DateTime64(123), + 3, + { + 0, + 123 + } } }) ); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 496b8000441..fcebedf92ec 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -931,12 +931,29 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re ++buf.position(); /// Keep sign of fractional part the same with whole part if datetime64 is negative - /// 1965-12-12 12:12:12.123 => whole = -127914468, fraction = 123(sign>0) -> new whole = -127914467, new fraction = 877(sign<0) + /// Case1: + /// 1965-12-12 12:12:12.123 + /// => whole = -127914468, fractional = 123(coefficient>0) + /// => new whole = -127914467, new fractional = 877(coefficient<0) + /// + /// Case2: + /// 1969-12-31 23:59:59.123 + /// => whole = -1, fractional = 123(coefficient>0) + /// => new whole = 0, new fractional = -877(coefficient>0) if (components.whole < 0 && components.fractional != 0) { const auto scale_multiplier = DecimalUtils::scaleMultiplier(scale); ++components.whole; - components.fractional = scale_multiplier - components.fractional; + if (components.whole) + { + /// whole keep the sign, fractional should be non-negative + components.fractional = scale_multiplier - components.fractional; + } + else + { + /// when whole is zero, fractional should keep the sign + components.fractional = components.fractional - scale_multiplier; + } } } /// 9908870400 is time_t value for 2184-01-01 UTC (a bit over the last year supported by DateTime64) diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 8547a0af1cd..5eab75f14b1 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -805,11 +805,21 @@ inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & scale = scale > MaxScale ? MaxScale : scale; auto components = DecimalUtils::split(datetime64, scale); - /// -127914467.877 => whole = -127914467, fraction = 877 => new whole = -127914468(1965-12-12 12:12:12), new fraction = 123(.123) => 1965-12-12 12:12:12.123 - if (components.whole < 0 && components.fractional != 0) + /// Case1: + /// -127914467.877 + /// => whole = -127914467, fraction = 877(After DecimalUtils::split) + /// => new whole = -127914468(1965-12-12 12:12:12), new fraction = 1000 - 877 = 123(.123) + /// => 1965-12-12 12:12:12.123 + /// + /// Case2: + /// -0.877 + /// => whole = 0, fractional = -877(After DecimalUtils::split) + /// => whole = -1(1969-12-31 23:59:59), fractional = 1000 + (-877) = 123(.123) + using T = typename DateTime64::NativeType; + if (datetime64.value < 0 && components.fractional) { + components.fractional = DecimalUtils::scaleMultiplier(scale) + (components.whole ? T(-1) : T(1)) * components.fractional; --components.whole; - components.fractional = DecimalUtils::scaleMultiplier(scale) - components.fractional; } writeDateTimeText(LocalDateTime(components.whole, time_zone), buf); @@ -989,7 +999,12 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer { part = DecimalUtils::getFractionalPart(x, scale); if (part || trailing_zeros) + { + if (part < 0) + part *= T(-1); + writeDecimalFractional(part, scale, ostr, trailing_zeros); + } } } diff --git a/tests/queries/0_stateless/02242_negative_datetime64.reference b/tests/queries/0_stateless/02242_negative_datetime64.reference index 7f14679ac56..fbbebb520ae 100644 --- a/tests/queries/0_stateless/02242_negative_datetime64.reference +++ b/tests/queries/0_stateless/02242_negative_datetime64.reference @@ -1,2 +1,3 @@ -127914467.877 187618332.123 +1969-12-31 23:59:59.123 diff --git a/tests/queries/0_stateless/02242_negative_datetime64.sql b/tests/queries/0_stateless/02242_negative_datetime64.sql index 32086188608..40679841943 100644 --- a/tests/queries/0_stateless/02242_negative_datetime64.sql +++ b/tests/queries/0_stateless/02242_negative_datetime64.sql @@ -1,2 +1,3 @@ SELECT cast(toDateTime64('1965-12-12 12:12:12.123', 3, 'UTC') as Decimal64(3)); SELECT cast(toDateTime64('1975-12-12 12:12:12.123', 3, 'UTC') as Decimal64(3)); +SELECT toDateTime64('1969-12-31 23:59:59.123', 3, 'UTC'); From 501a8158a5a45876e143240c47ae6aec833251cc Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 23 May 2022 13:10:08 +0200 Subject: [PATCH 460/615] Don't fail docker images on dispatch or other events --- tests/ci/docker_images_check.py | 6 +++++- tests/ci/pr_info.py | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 57227ef307e..c1d1c1df1f1 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -404,7 +404,11 @@ def main(): elif args.image_path: pr_info.changed_files = set(i for i in args.image_path) else: - pr_info.fetch_changed_files() + try: + pr_info.fetch_changed_files() + except TypeError: + # If the event does not contain diff, nothing will be built + pass changed_images = get_changed_docker_images(pr_info, images_dict) if changed_images: diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 9d287d2a07e..d17e0f1f379 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -186,6 +186,7 @@ class PRInfo: else: self.diff_url = pull_request["diff_url"] else: + print("event.json does not match pull_request or push:") print(json.dumps(github_event, sort_keys=True, indent=4)) self.sha = os.getenv("GITHUB_SHA") self.number = 0 @@ -204,8 +205,8 @@ class PRInfo: self.fetch_changed_files() def fetch_changed_files(self): - if not self.diff_url: - raise Exception("Diff URL cannot be find for event") + if not getattr(self, "diff_url", False): + raise TypeError("The event does not have diff URL") response = get_with_retries( self.diff_url, From 3ab5390327b86dfc7558e3875d6ae50ccf411bbf Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 23 May 2022 13:26:10 +0200 Subject: [PATCH 461/615] Trigger rebuild for docs-release docker image --- docker/docs/release/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/docs/release/Dockerfile b/docker/docs/release/Dockerfile index 024cf8e6cc6..89536889746 100644 --- a/docker/docs/release/Dockerfile +++ b/docker/docs/release/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #33610 # docker build -t clickhouse/docs-release . FROM ubuntu:20.04 From 21d6fc54d5e3345b770d0e56f4ba09d2d252a3ed Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 23 May 2022 13:45:47 +0200 Subject: [PATCH 462/615] Do not check docs_release.py for changed files --- tests/ci/docs_release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index d404e79c312..71a07ab5ba9 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -25,7 +25,7 @@ if __name__ == "__main__": repo_path = REPO_COPY gh = Github(get_best_robot_token()) - pr_info = PRInfo(need_changed_files=True) + pr_info = PRInfo() rerun_helper = RerunHelper(gh, pr_info, NAME) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") From 249af9c69a47b3e2fe9cfdf4252cc13aaf5d765d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 23 May 2022 14:01:59 +0200 Subject: [PATCH 463/615] Run container as user, print stdout --- tests/ci/docs_release.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index 71a07ab5ba9..3e8906b765f 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import argparse import logging import subprocess import os @@ -15,11 +16,25 @@ from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import get_commit from rerun_helper import RerunHelper +from tee_popen import TeePopen NAME = "Docs Release (actions)" + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="ClickHouse building script using prebuilt Docker image", + ) + parser.add_argument( + "--as-root", action="store_true", help="if the container should run as root" + ) + return parser.parse_args() + + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) + args = parse_args() temp_path = TEMP_PATH repo_path = REPO_COPY @@ -41,19 +56,21 @@ if __name__ == "__main__": os.makedirs(test_output) token = CLOUDFLARE_TOKEN + if args.as_root: + user = "0:0" + else: + user = f"{os.geteuid()}:{os.getegid()}" cmd = ( "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent " - f"-e SSH_AUTH_SOCK=/ssh-agent -e CLOUDFLARE_TOKEN={token} " + f"--user={user} -e SSH_AUTH_SOCK=/ssh-agent -e CLOUDFLARE_TOKEN={token} " f"-e EXTRA_BUILD_ARGS='--verbose' --volume={repo_path}:/repo_path" f" --volume={test_output}:/output_path {docker_image}" ) run_log_path = os.path.join(test_output, "runlog.log") - with open(run_log_path, "w", encoding="utf-8") as log, SSHKey( - "ROBOT_CLICKHOUSE_SSH_KEY" - ): - with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process: + with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): + with TeePopen(cmd, run_log_path) as process: retcode = process.wait() if retcode == 0: logging.info("Run successfully") From 3af9a699d5f0e2fdd2af7551c68f2007c97c6599 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 10:42:34 +0200 Subject: [PATCH 464/615] Fix ssh-agent socket in docker image --- tests/ci/docs_release.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index 3e8906b765f..ba1c99e9b3f 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -55,21 +55,22 @@ if __name__ == "__main__": if not os.path.exists(test_output): os.makedirs(test_output) - token = CLOUDFLARE_TOKEN if args.as_root: user = "0:0" else: user = f"{os.geteuid()}:{os.getegid()}" - cmd = ( - "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent " - f"--user={user} -e SSH_AUTH_SOCK=/ssh-agent -e CLOUDFLARE_TOKEN={token} " - f"-e EXTRA_BUILD_ARGS='--verbose' --volume={repo_path}:/repo_path" - f" --volume={test_output}:/output_path {docker_image}" - ) run_log_path = os.path.join(test_output, "runlog.log") with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): + cmd = ( + f"docker run --cap-add=SYS_PTRACE --user={user} " + f"--volume='{os.getenv('SSH_AUTH_SOCK', '')}:/ssh-agent' " + f"--volume={repo_path}:/repo_path --volume={test_output}:/output_path " + f"-e SSH_AUTH_SOCK=/ssh-agent -e EXTRA_BUILD_ARGS='--verbose' " + f"-e CLOUDFLARE_TOKEN={CLOUDFLARE_TOKEN} {docker_image}" + ) + logging.info("Running command: %s", cmd) with TeePopen(cmd, run_log_path) as process: retcode = process.wait() if retcode == 0: From a83864cdcbc0e12bd14673e86e53e9a272cd150b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 11:11:29 +0200 Subject: [PATCH 465/615] Fix shellcheck issues in docs/tools --- docs/tools/deploy-to-test.sh | 9 ++++----- docs/tools/release.sh | 12 ++++++------ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/tools/deploy-to-test.sh b/docs/tools/deploy-to-test.sh index 30771052535..a7a922137d5 100755 --- a/docs/tools/deploy-to-test.sh +++ b/docs/tools/deploy-to-test.sh @@ -12,12 +12,11 @@ # set -ex -BASE_DIR=$(dirname $(readlink -f $0)) +BASE_DIR=$(dirname "$(readlink -f "$0")") GIT_USER=${GIT_USER:-$USER} -GIT_TEST_URI=git@github.com:${GIT_USER}/clickhouse.github.io.git \ +GIT_PROD_URI=git@github.com:${GIT_USER}/clickhouse.github.io.git \ BASE_DOMAIN=${GIT_USER}-test.clickhouse.com \ - EXTRA_BUILD_ARGS="${@}" \ + EXTRA_BUILD_ARGS="${*}" \ CLOUDFLARE_TOKEN="" \ - HISTORY_SIZE=3 \ - ${BASE_DIR}/release.sh + "${BASE_DIR}/release.sh" diff --git a/docs/tools/release.sh b/docs/tools/release.sh index 3482a0fbcc1..b55841f9da2 100755 --- a/docs/tools/release.sh +++ b/docs/tools/release.sh @@ -1,24 +1,24 @@ #!/usr/bin/env bash set -ex -BASE_DIR=$(dirname $(readlink -f $0)) +BASE_DIR=$(dirname "$(readlink -f "$0")") BUILD_DIR="${BASE_DIR}/../build" PUBLISH_DIR="${BASE_DIR}/../publish" BASE_DOMAIN="${BASE_DOMAIN:-content.clickhouse.com}" -GIT_TEST_URI="${GIT_TEST_URI:-git@github.com:ClickHouse/clickhouse-com-content.git}" -GIT_PROD_URI="git@github.com:ClickHouse/clickhouse-website-content.git" +GIT_PROD_URI="${GIT_PROD_URI:-git@github.com:ClickHouse/clickhouse-com-content.git}" EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS:---verbose}" if [[ -z "$1" ]] then source "${BASE_DIR}/venv/bin/activate" + # shellcheck disable=2086 python3 "${BASE_DIR}/build.py" ${EXTRA_BUILD_ARGS} rm -rf "${PUBLISH_DIR}" mkdir "${PUBLISH_DIR}" && cd "${PUBLISH_DIR}" # Will make a repository with website content as the only commit. git init - git remote add origin "${GIT_TEST_URI}" + git remote add origin "${GIT_PROD_URI}" git config user.email "robot-clickhouse@clickhouse.com" git config user.name "robot-clickhouse" @@ -28,7 +28,7 @@ then echo -n "" > README.md echo -n "" > ".nojekyll" cp "${BASE_DIR}/../../LICENSE" . - git add * + git add ./* git add ".nojekyll" git commit --quiet -m "Add new release at $(date)" @@ -40,7 +40,7 @@ then # Turn off logging. set +x - if [[ ! -z "${CLOUDFLARE_TOKEN}" ]] + if [[ -n "${CLOUDFLARE_TOKEN}" ]] then sleep 1m # https://api.cloudflare.com/#zone-purge-files-by-cache-tags,-host-or-prefix From 90425dedd15d2db00b3b39e0cef422f5912f66c7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 11:13:58 +0200 Subject: [PATCH 466/615] Fail release on status == failure --- tests/ci/docs_release.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index ba1c99e9b3f..806db28c1b1 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -116,3 +116,6 @@ if __name__ == "__main__": commit.create_status( context=NAME, description=description, state=status, target_url=report_url ) + + if status == "failure": + sys.exit(1) From 164f8227f92fbdd0d0ae1f2fb05d24e628d9d29b Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 24 May 2022 12:38:11 +0200 Subject: [PATCH 467/615] Fix memory sanitizer --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 8988a456f52..78c0fa0ae3c 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -259,8 +259,7 @@ void S3ObjectStorage::removeObjects(const std::vector & paths) request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - if (outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - throwIfError(outcome); + throwIfError(outcome); } } @@ -277,7 +276,7 @@ void S3ObjectStorage::removeObjectIfExists(const std::string & path) request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - if (outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + if (!outcome.IsSuccess() && outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) throwIfError(outcome); } @@ -314,7 +313,8 @@ void S3ObjectStorage::removeObjectsIfExist(const std::vector & path request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); + if (!outcome.IsSuccess() && outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + throwIfError(outcome); } } From b4c0f7a6216addf14cedeec7b0cffaf8d91f0ba8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 12:56:57 +0200 Subject: [PATCH 468/615] Simplify labels check --- tests/ci/run_check.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index bd70134760a..87139c5bb8a 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -253,16 +253,7 @@ if __name__ == "__main__": ) sys.exit(1) else: - if "pr-documentation" in pr_info.labels or "pr-doc-fix" in pr_info.labels: - commit.create_status( - context=NAME, - description="Skipping checks for documentation", - state="success", - target_url=url, - ) - print("::notice ::Can run, but it's documentation PR, skipping") - else: - print("::notice ::Can run") - commit.create_status( - context=NAME, description=description, state="pending", target_url=url - ) + print("::notice ::Can run") + commit.create_status( + context=NAME, description=description, state="pending", target_url=url + ) From 093d3157563d5f7d14ab2f0c6bb71db0324b90ef Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 19 May 2022 19:18:58 +0800 Subject: [PATCH 469/615] partition pruning for s3 --- src/Functions/IFunction.cpp | 8 +- src/Storages/StorageS3.cpp | 185 +++++++++++++++--- src/Storages/StorageS3.h | 34 ++-- src/Storages/StorageS3Cluster.cpp | 22 ++- src/Storages/StorageS3Cluster.h | 2 + .../02302_s3_file_pruning.reference | 27 +++ .../0_stateless/02302_s3_file_pruning.sql | 35 ++++ 7 files changed, 264 insertions(+), 49 deletions(-) create mode 100644 tests/queries/0_stateless/02302_s3_file_pruning.reference create mode 100644 tests/queries/0_stateless/02302_s3_file_pruning.sql diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 19638c78daf..da5864066e4 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -250,7 +250,10 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType : columns_without_low_cardinality.front().column->size(); auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run); - auto keys = res->convertToFullColumnIfConst(); + bool res_is_constant = isColumnConst(*res); + auto keys = res_is_constant + ? res->cloneResized(std::min(static_cast(1), input_rows_count))->convertToFullColumnIfConst() + : res; auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType()); ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size()); @@ -260,6 +263,9 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType result = ColumnLowCardinality::create(res_dictionary, res_indexes->index(*indexes, 0)); else result = ColumnLowCardinality::create(res_dictionary, res_indexes); + + if (res_is_constant) + result = ColumnConst::create(std::move(result), input_rows_count); } else { diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index d402dce5ede..8e7b2e9d497 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -75,17 +76,18 @@ namespace ErrorCodes extern const int UNEXPECTED_EXPRESSION; extern const int DATABASE_ACCESS_DENIED; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int NOT_IMPLEMENTED; } class IOutputFormat; using OutputFormatPtr = std::shared_ptr; -class StorageS3Source::DisclosedGlobIterator::Impl +class StorageS3Source::DisclosedGlobIterator::Impl : WithContext { public: - Impl(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) - : client(client_), globbed_uri(globbed_uri_) + Impl(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_, ASTPtr & query_, const Block & virtual_header_, ContextPtr context_) + : WithContext(context_), client(client_), globbed_uri(globbed_uri_), query(query_), virtual_header(virtual_header_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); @@ -101,6 +103,20 @@ public: return; } + /// Create a virtual block with one row to construct filter + if (query && virtual_header) + { + /// Append "key" column as the filter result + virtual_header.insert({ColumnString::create(), std::make_shared(), "_key"}); + + auto block = virtual_header.cloneEmpty(); + MutableColumns columns = block.mutateColumns(); + for (auto & column : columns) + column->insertDefault(); + block.setColumns(std::move(columns)); + VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); + } + request.SetBucket(globbed_uri.bucket); request.SetPrefix(key_prefix); matcher = std::make_unique(makeRegexpPatternFromGlobs(globbed_uri.key)); @@ -144,13 +160,52 @@ private: const auto & result_batch = outcome.GetResult().GetContents(); - buffer.reserve(result_batch.size()); - for (const auto & row : result_batch) + if (filter_ast) { - String key = row.GetKey(); - if (re2::RE2::FullMatch(key, *matcher)) - buffer.emplace_back(std::move(key)); + auto block = virtual_header.cloneEmpty(); + MutableColumnPtr path_column; + MutableColumnPtr file_column; + MutableColumnPtr key_column = block.getByName("_key").column->assumeMutable(); + + if (block.has("_path")) + path_column = block.getByName("_path").column->assumeMutable(); + + if (block.has("_file")) + file_column = block.getByName("_file").column->assumeMutable(); + + for (const auto & row : result_batch) + { + const String & key = row.GetKey(); + if (re2::RE2::FullMatch(key, *matcher)) + { + String path = fs::path(globbed_uri.bucket) / key; + String file = path.substr(path.find_last_of('/') + 1); + if (path_column) + path_column->insert(path); + if (file_column) + file_column->insert(file); + key_column->insert(key); + } + } + + VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); + const ColumnString & keys = typeid_cast(*block.getByName("_key").column); + size_t rows = block.rows(); + buffer.reserve(rows); + for (size_t i = 0; i < rows; ++i) + buffer.emplace_back(keys.getDataAt(i).toString()); } + else + { + buffer.reserve(result_batch.size()); + for (const auto & row : result_batch) + { + String key = row.GetKey(); + if (re2::RE2::FullMatch(key, *matcher)) + buffer.emplace_back(std::move(key)); + } + } + /// Set iterator only after the whole batch is processed buffer_iter = buffer.begin(); @@ -165,25 +220,83 @@ private: Strings::iterator buffer_iter; Aws::S3::S3Client client; S3::URI globbed_uri; + ASTPtr query; + Block virtual_header; + ASTPtr filter_ast; Aws::S3::Model::ListObjectsV2Request request; Aws::S3::Model::ListObjectsV2Outcome outcome; std::unique_ptr matcher; bool is_finished{false}; }; -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) - : pimpl(std::make_shared(client_, globbed_uri_)) {} +StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( + Aws::S3::S3Client & client_, const S3::URI & globbed_uri_, ASTPtr query, const Block & virtual_header, ContextPtr context) + : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context)) +{ +} String StorageS3Source::DisclosedGlobIterator::next() { return pimpl->next(); } -class StorageS3Source::KeysIterator::Impl +class StorageS3Source::KeysIterator::Impl : WithContext { public: - explicit Impl(const std::vector & keys_) : keys(keys_) + explicit Impl( + const std::vector & keys_, const String & bucket_, ASTPtr query_, const Block & virtual_header_, ContextPtr context_) + : WithContext(context_), keys(keys_), bucket(bucket_), query(query_), virtual_header(virtual_header_) { + /// Create a virtual block with one row to construct filter + if (query && virtual_header) + { + /// Append "key" column as the filter result + virtual_header.insert({ColumnString::create(), std::make_shared(), "_key"}); + + auto block = virtual_header.cloneEmpty(); + MutableColumns columns = block.mutateColumns(); + for (auto & column : columns) + column->insertDefault(); + block.setColumns(std::move(columns)); + + ASTPtr filter_ast; + VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); + + if (filter_ast) + { + block = virtual_header.cloneEmpty(); + MutableColumnPtr path_column; + MutableColumnPtr file_column; + MutableColumnPtr key_column = block.getByName("_key").column->assumeMutable(); + + if (block.has("_path")) + path_column = block.getByName("_path").column->assumeMutable(); + + if (block.has("_file")) + file_column = block.getByName("_file").column->assumeMutable(); + + for (const auto & key : keys) + { + String path = fs::path(bucket) / key; + String file = path.substr(path.find_last_of('/') + 1); + if (path_column) + path_column->insert(path); + if (file_column) + file_column->insert(file); + key_column->insert(key); + } + + VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); + const ColumnString & keys_col = typeid_cast(*block.getByName("_key").column); + size_t rows = block.rows(); + Strings filtered_keys; + filtered_keys.reserve(rows); + for (size_t i = 0; i < rows; ++i) + filtered_keys.emplace_back(keys_col.getDataAt(i).toString()); + + keys = std::move(filtered_keys); + } + } } String next() @@ -197,9 +310,15 @@ public: private: Strings keys; std::atomic_size_t index = 0; + + String bucket; + ASTPtr query; + Block virtual_header; }; -StorageS3Source::KeysIterator::KeysIterator(const std::vector & keys_) : pimpl(std::make_shared(keys_)) +StorageS3Source::KeysIterator::KeysIterator( + const std::vector & keys_, const String & bucket_, ASTPtr query, const Block & virtual_header, ContextPtr context) + : pimpl(std::make_shared(keys_, bucket_, query, virtual_header, context)) { } @@ -639,6 +758,8 @@ StorageS3::StorageS3( auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); virtual_columns = getVirtualsForStorage(columns, default_virtuals); + for (const auto & column : virtual_columns) + virtual_block.insert({column.type->createColumn(), column.type, column.name}); } std::shared_ptr StorageS3::createFileIterator( @@ -647,6 +768,8 @@ std::shared_ptr StorageS3::createFileIterator( bool is_key_with_globs, bool distributed_processing, ContextPtr local_context, + ASTPtr query, + const Block & virtual_block, const std::vector & read_tasks) { if (distributed_processing) @@ -660,19 +783,15 @@ std::shared_ptr StorageS3::createFileIterator( else if (is_key_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(*s3_configuration.client, s3_configuration.uri); - return std::make_shared([glob_iterator]() - { - return glob_iterator->next(); - }); + auto glob_iterator = std::make_shared( + *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context); + return std::make_shared([glob_iterator]() { return glob_iterator->next(); }); } else { - auto keys_iterator = std::make_shared(keys); - return std::make_shared([keys_iterator]() - { - return keys_iterator->next(); - }); + auto keys_iterator + = std::make_shared(keys, s3_configuration.uri.bucket, query, virtual_block, local_context); + return std::make_shared([keys_iterator]() { return keys_iterator->next(); }); } } @@ -684,12 +803,17 @@ bool StorageS3::isColumnOriented() const Pipe StorageS3::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned num_streams) { + bool has_wildcards = s3_configuration.uri.bucket.find(PARTITION_ID_WILDCARD) != String::npos + || keys.back().find(PARTITION_ID_WILDCARD) != String::npos; + if (partition_by && has_wildcards) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet"); + updateS3Configuration(local_context, s3_configuration); Pipes pipes; @@ -703,7 +827,15 @@ Pipe StorageS3::read( requested_virtual_columns.push_back(virtual_column); } - std::shared_ptr iterator_wrapper = createFileIterator(s3_configuration, keys, is_key_with_globs, distributed_processing, local_context, read_tasks_used_in_schema_inference); + std::shared_ptr iterator_wrapper = createFileIterator( + s3_configuration, + keys, + is_key_with_globs, + distributed_processing, + local_context, + query_info.query, + virtual_block, + read_tasks_used_in_schema_inference); ColumnsDescription columns_description; Block block_for_format; @@ -999,7 +1131,8 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( ContextPtr ctx, std::vector * read_keys_in_distributed_processing) { - auto file_iterator = createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx); + auto file_iterator + = createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}); ReadBufferIterator read_buffer_iterator = [&, first = false]() mutable -> std::unique_ptr { diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index cac5b3c270f..00752324d76 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -34,25 +34,28 @@ class StorageS3Source : public SourceWithProgress, WithContext public: class DisclosedGlobIterator { - public: - DisclosedGlobIterator(Aws::S3::S3Client &, const S3::URI &); - String next(); - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; + public: + DisclosedGlobIterator( + Aws::S3::S3Client & client_, const S3::URI & globbed_uri_, ASTPtr query, const Block & virtual_header, ContextPtr context); + String next(); + + private: + class Impl; + /// shared_ptr to have copy constructor + std::shared_ptr pimpl; }; class KeysIterator { - public: - explicit KeysIterator(const std::vector & keys_); - String next(); + public: + explicit KeysIterator( + const std::vector & keys_, const String & bucket_, ASTPtr query, const Block & virtual_header, ContextPtr context); + String next(); - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; + private: + class Impl; + /// shared_ptr to have copy constructor + std::shared_ptr pimpl; }; class ReadTasksIterator @@ -203,6 +206,7 @@ private: S3Configuration s3_configuration; std::vector keys; NamesAndTypesList virtual_columns; + Block virtual_block; String format_name; String compression_method; @@ -222,6 +226,8 @@ private: bool is_key_with_globs, bool distributed_processing, ContextPtr local_context, + ASTPtr query, + const Block & virtual_block, const std::vector & read_tasks = {}); static ColumnsDescription getTableStructureFromDataImpl( diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index c0631311d8c..13be199bd37 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -82,6 +83,15 @@ StorageS3Cluster::StorageS3Cluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); + + auto default_virtuals = NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; + + auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); + virtual_columns = getVirtualsForStorage(columns, default_virtuals); + for (const auto & column : virtual_columns) + virtual_block.insert({column.type->createColumn(), column.type, column.name}); } /// The code executes on initiator @@ -98,11 +108,9 @@ Pipe StorageS3Cluster::read( auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); - auto iterator = std::make_shared(*s3_configuration.client, s3_configuration.uri); - auto callback = std::make_shared([iterator]() mutable -> String - { - return iterator->next(); - }); + auto iterator = std::make_shared( + *s3_configuration.client, s3_configuration.uri, query_info.query, virtual_block, context); + auto callback = std::make_shared([iterator]() mutable -> String { return iterator->next(); }); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) Block header = @@ -164,9 +172,7 @@ QueryProcessingStage::Enum StorageS3Cluster::getQueryProcessingStage( NamesAndTypesList StorageS3Cluster::getVirtuals() const { - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; + return virtual_columns; } diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 5470dda3e97..f823d1fdf04 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -49,6 +49,8 @@ private: String cluster_name; String format_name; String compression_method; + NamesAndTypesList virtual_columns; + Block virtual_block; }; diff --git a/tests/queries/0_stateless/02302_s3_file_pruning.reference b/tests/queries/0_stateless/02302_s3_file_pruning.reference new file mode 100644 index 00000000000..f8d2bdd0612 --- /dev/null +++ b/tests/queries/0_stateless/02302_s3_file_pruning.reference @@ -0,0 +1,27 @@ +-- { echo } +drop table if exists test_02302; +create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302_{_partition_id}', format=Parquet) partition by a; +insert into test_02302 select number from numbers(10) settings s3_truncate_on_insert=1; +select * from test_02302; -- { serverError 48 } +drop table test_02302; +set max_rows_to_read = 1; +-- Test s3 table function with glob +select * from s3(s3_conn, filename='test_02302_*', format=Parquet) where _file like '%5'; +5 +-- Test s3 table with explicit keys (no glob) +-- TODO support truncate table function +drop table if exists test_02302; +create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302.2', format=Parquet); +truncate table test_02302; +drop table if exists test_02302; +create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302.1', format=Parquet); +truncate table test_02302; +drop table if exists test_02302; +create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302', format=Parquet); +truncate table test_02302; +insert into test_02302 select 0 settings s3_create_new_file_on_insert = true; +insert into test_02302 select 1 settings s3_create_new_file_on_insert = true; +insert into test_02302 select 2 settings s3_create_new_file_on_insert = true; +select * from test_02302 where _file like '%1'; +1 +drop table test_02302; diff --git a/tests/queries/0_stateless/02302_s3_file_pruning.sql b/tests/queries/0_stateless/02302_s3_file_pruning.sql new file mode 100644 index 00000000000..c6dc355bf0c --- /dev/null +++ b/tests/queries/0_stateless/02302_s3_file_pruning.sql @@ -0,0 +1,35 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +-- { echo } +drop table if exists test_02302; +create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302_{_partition_id}', format=Parquet) partition by a; +insert into test_02302 select number from numbers(10) settings s3_truncate_on_insert=1; +select * from test_02302; -- { serverError 48 } +drop table test_02302; + +set max_rows_to_read = 1; + +-- Test s3 table function with glob +select * from s3(s3_conn, filename='test_02302_*', format=Parquet) where _file like '%5'; + +-- Test s3 table with explicit keys (no glob) +-- TODO support truncate table function +drop table if exists test_02302; +create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302.2', format=Parquet); +truncate table test_02302; + +drop table if exists test_02302; +create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302.1', format=Parquet); +truncate table test_02302; + +drop table if exists test_02302; +create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302', format=Parquet); +truncate table test_02302; + +insert into test_02302 select 0 settings s3_create_new_file_on_insert = true; +insert into test_02302 select 1 settings s3_create_new_file_on_insert = true; +insert into test_02302 select 2 settings s3_create_new_file_on_insert = true; + +select * from test_02302 where _file like '%1'; +drop table test_02302; From c25ef92139c4aa5b58e64e3c7fafeedae040423d Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 19 May 2022 21:17:14 +0800 Subject: [PATCH 470/615] Fix tests --- src/Functions/IFunction.cpp | 4 ++-- tests/queries/0_stateless/02000_join_on_const.reference | 1 + tests/queries/0_stateless/02000_join_on_const.sql | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index da5864066e4..255c80e0960 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -252,14 +252,14 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run); bool res_is_constant = isColumnConst(*res); auto keys = res_is_constant - ? res->cloneResized(std::min(static_cast(1), input_rows_count))->convertToFullColumnIfConst() + ? res->cloneResized(1)->convertToFullColumnIfConst() : res; auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType()); ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size()); ColumnUniquePtr res_dictionary = std::move(res_mut_dictionary); - if (indexes) + if (indexes && !res_is_constant) result = ColumnLowCardinality::create(res_dictionary, res_indexes->index(*indexes, 0)); else result = ColumnLowCardinality::create(res_dictionary, res_indexes); diff --git a/tests/queries/0_stateless/02000_join_on_const.reference b/tests/queries/0_stateless/02000_join_on_const.reference index b9494e4689c..3035351fd87 100644 --- a/tests/queries/0_stateless/02000_join_on_const.reference +++ b/tests/queries/0_stateless/02000_join_on_const.reference @@ -4,6 +4,7 @@ 1 1 1 +1 - ON NULL - - inner - - left - diff --git a/tests/queries/0_stateless/02000_join_on_const.sql b/tests/queries/0_stateless/02000_join_on_const.sql index f6d686cf9bc..92ded98b5f4 100644 --- a/tests/queries/0_stateless/02000_join_on_const.sql +++ b/tests/queries/0_stateless/02000_join_on_const.sql @@ -13,8 +13,8 @@ SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 JOIN t2 ON 2 = SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 INNER ANY JOIN t2 ON toNullable(1); SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 INNER ANY JOIN t2 ON toLowCardinality(1); SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 INNER ANY JOIN t2 ON toLowCardinality(toNullable(1)); +SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 INNER ANY JOIN t2 ON toNullable(toLowCardinality(1)); -SELECT * FROM t1 INNER ANY JOIN t2 ON toNullable(toLowCardinality(1)); -- { serverError 403 } SELECT * FROM t1 INNER ANY JOIN t2 ON toUInt16(1); -- { serverError 403 } SELECT * FROM t1 INNER ANY JOIN t2 ON toInt8(1); -- { serverError 403 } SELECT * FROM t1 INNER ANY JOIN t2 ON 256; -- { serverError 403 } From 1ee02a4ac5bf2625085640f0d06eab8d0cd8fda4 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 20 May 2022 09:07:42 +0800 Subject: [PATCH 471/615] No parallel testing --- tests/queries/0_stateless/02302_s3_file_pruning.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02302_s3_file_pruning.sql b/tests/queries/0_stateless/02302_s3_file_pruning.sql index c6dc355bf0c..624a87506d1 100644 --- a/tests/queries/0_stateless/02302_s3_file_pruning.sql +++ b/tests/queries/0_stateless/02302_s3_file_pruning.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-parallel, no-fasttest -- Tag no-fasttest: Depends on AWS -- { echo } From c3f78431926172e1561a65c2f1172287c9232c0c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 12:58:27 +0200 Subject: [PATCH 472/615] Sync paths for docs and PR workflows --- .github/workflows/docs_check.yml | 2 +- .github/workflows/pull_request.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 2b02e7c23ae..0c657a245cb 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -13,9 +13,9 @@ on: # yamllint disable-line rule:truthy branches: - master paths: + - 'docker/docs/**' - 'docs/**' - 'website/**' - - 'docker/docs/**' jobs: CheckLabels: runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 6482ddebe06..01490dff59e 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -13,6 +13,7 @@ on: # yamllint disable-line rule:truthy branches: - master paths-ignore: + - 'docker/docs/**' - 'docs/**' - 'website/**' ########################################################################################## From 8dced1af03cead91e76600e789e604f5050378ef Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Tue, 24 May 2022 07:29:52 -0400 Subject: [PATCH 473/615] Update docs/en/operations/system-tables/mutations.md Co-authored-by: Antonio Andelic --- docs/en/operations/system-tables/mutations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index 6d74a0de9c3..2878a19a1e7 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -8,7 +8,7 @@ Columns: - `table` ([String](../../sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied. -- `mutation_id` ([String](../../sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper or ZooKeeper. For non-replicated tables the IDs correspond to file names in the data directory of the table. +- `mutation_id` ([String](../../sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table. - `command` ([String](../../sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`). From 282f037659730eb64063c7ee711562e9aafcfeac Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 24 May 2022 15:14:44 +0300 Subject: [PATCH 474/615] Set compiler flags for ASM same as for C/CXX This should fix DWARF version for ASM sources (from #34754): $ llvm-dwarfdump -r 1 clickhouse | grep Compile -A10 | grep -A10 'version = 0x0005' ... -- 0x1de11022: Compile Unit: length = 0x000000e7, format = DWARF32, version = 0x0005, unit_type = DW_UT_compile, abbr_offset = 0x551dc0, addr_size = 0x08 (next unit at 0x1de1110d) 0x1de1102e: DW_TAG_compile_unit DW_AT_stmt_list (0x057e3442) DW_AT_low_pc (0x00000000101cde0c) DW_AT_high_pc (0x00000000101cde5e) DW_AT_name ("/ClickHouse/contrib/libunwind/src/UnwindRegistersSave.S") DW_AT_comp_dir ("/fasttest-workspace/build") DW_AT_producer ("Ubuntu clang version 14.0.1-++20220426083040+0e27d08cdeb3-1~exp1~20220426083051.129") DW_AT_language (DW_LANG_Mips_Assembler) ... Follow-up for: #34777 (cc @alexey-milovidov) Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e02ded4070..abe263834ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -351,6 +351,10 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${C set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") +set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") +set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_ASM_FLAGS_ADD}") + if (COMPILER_CLANG) if (OS_DARWIN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") From e6e4b2826df4576c9dcef16c1783416ad8658f89 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 24 May 2022 14:25:29 +0200 Subject: [PATCH 475/615] Dynamic dispatch infrastructure style fixes --- src/AggregateFunctions/AggregateFunctionSum.h | 12 ++++++------ src/Common/TargetSpecific.h | 12 ++++++------ src/Functions/FunctionUnaryArithmetic.h | 11 +++++------ src/Functions/FunctionsComparison.h | 10 ++++++---- tests/performance/unary_arithmetic_functions.xml | 2 -- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index acff8e7b90f..03aeda1bb9b 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -59,11 +59,11 @@ struct AggregateFunctionSumData } /// Vectorized version - MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(addManyImpl, - MULTITARGET_FH( + MULTITARGET_FUNCTION_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER( template void NO_SANITIZE_UNDEFINED NO_INLINE - ), /*addManyImpl*/ MULTITARGET_FB((const Value * __restrict ptr, size_t start, size_t end) /// NOLINT + ), addManyImpl, MULTITARGET_FUNCTION_BODY((const Value * __restrict ptr, size_t start, size_t end) /// NOLINT { ptr += start; size_t count = end - start; @@ -122,11 +122,11 @@ struct AggregateFunctionSumData addManyImpl(ptr, start, end); } - MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(addManyConditionalInternalImpl, - MULTITARGET_FH( + MULTITARGET_FUNCTION_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER( template void NO_SANITIZE_UNDEFINED NO_INLINE - ), /*addManyConditionalInternalImpl*/ MULTITARGET_FB((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT + ), addManyConditionalInternalImpl, MULTITARGET_FUNCTION_BODY((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT { ptr += start; size_t count = end - start; diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index 89c0f467fe3..67d9eb4831d 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -233,8 +233,8 @@ DECLARE_AVX512F_SPECIFIC_CODE( * class TestClass * { * public: - * MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(testFunctionImpl, - * MULTITARGET_FH(int), /\*testFunction*\/ MULTITARGET_FB((int value) + * MULTITARGET_FUNCTION_AVX2_SSE42( + * MULTITARGET_FUNCTION_HEADER(int), testFunctionImpl, MULTITARGET_FUNCTION_BODY((int value) * { * return value; * }) @@ -259,15 +259,15 @@ DECLARE_AVX512F_SPECIFIC_CODE( */ /// Function header -#define MULTITARGET_FH(...) __VA_ARGS__ +#define MULTITARGET_FUNCTION_HEADER(...) __VA_ARGS__ /// Function body -#define MULTITARGET_FB(...) __VA_ARGS__ +#define MULTITARGET_FUNCTION_BODY(...) __VA_ARGS__ #if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__) /// NOLINTNEXTLINE -#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \ +#define MULTITARGET_FUNCTION_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \ FUNCTION_HEADER \ \ AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \ @@ -288,7 +288,7 @@ DECLARE_AVX512F_SPECIFIC_CODE( #else /// NOLINTNEXTLINE -#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \ +#define MULTITARGET_FUNCTION_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \ FUNCTION_HEADER \ \ name \ diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index 4dc769b8177..445eb45fd9d 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -42,9 +42,8 @@ struct UnaryOperationImpl using ArrayA = typename ColVecA::Container; using ArrayC = typename ColVecC::Container; - MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorImpl, - MULTITARGET_FH( - static void NO_INLINE), /*vectorImpl*/ MULTITARGET_FB((const ArrayA & a, ArrayC & c) /// NOLINT + MULTITARGET_FUNCTION_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER(static void NO_INLINE), vectorImpl, MULTITARGET_FUNCTION_BODY((const ArrayA & a, ArrayC & c) /// NOLINT { size_t size = a.size(); for (size_t i = 0; i < size; ++i) @@ -79,9 +78,9 @@ struct UnaryOperationImpl template struct FixedStringUnaryOperationImpl { - MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorImpl, - MULTITARGET_FH( - static void NO_INLINE), /*vectorImpl*/ MULTITARGET_FB((const ColumnFixedString::Chars & a, ColumnFixedString::Chars & c) /// NOLINT + MULTITARGET_FUNCTION_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER(static void NO_INLINE), vectorImpl, MULTITARGET_FUNCTION_BODY((const ColumnFixedString::Chars & a, /// NOLINT + ColumnFixedString::Chars & c) { size_t size = a.size(); for (size_t i = 0; i < size; ++i) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 16575e551a7..7bbb1c1096c 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -85,8 +85,9 @@ struct NumComparisonImpl using ContainerA = PaddedPODArray; using ContainerB = PaddedPODArray; - MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorVectorImpl, - MULTITARGET_FH(static void), /*vectorVectorImpl*/ MULTITARGET_FB((const ContainerA & a, const ContainerB & b, PaddedPODArray & c) /// NOLINT + MULTITARGET_FUNCTION_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER(static void), vectorVectorImpl, MULTITARGET_FUNCTION_BODY(( /// NOLINT + const ContainerA & a, const ContainerB & b, PaddedPODArray & c) { /** GCC 4.8.2 vectorizes a loop only if it is written in this form. * In this case, if you loop through the array index (the code will look simpler), @@ -127,8 +128,9 @@ struct NumComparisonImpl } - MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorConstantImpl, - MULTITARGET_FH(static void), /*vectorConstantImpl*/ MULTITARGET_FB((const ContainerA & a, B b, PaddedPODArray & c) /// NOLINT + MULTITARGET_FUNCTION_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER(static void), vectorConstantImpl, MULTITARGET_FUNCTION_BODY(( /// NOLINT + const ContainerA & a, B b, PaddedPODArray & c) { size_t size = a.size(); const A * __restrict a_pos = a.data(); diff --git a/tests/performance/unary_arithmetic_functions.xml b/tests/performance/unary_arithmetic_functions.xml index 62e11457ac4..93dd5244c9b 100644 --- a/tests/performance/unary_arithmetic_functions.xml +++ b/tests/performance/unary_arithmetic_functions.xml @@ -1,6 +1,4 @@ - - func From 3ca7a8831b1b9fc8b8369d1f60243561961bd247 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 24 May 2022 14:26:06 +0200 Subject: [PATCH 476/615] Revert "fix deadlock during fetching part" This reverts commit 6ae8a26fae92f53efac49e8c37f009a6c4febb95. --- src/Storages/MergeTree/DataPartsExchange.cpp | 182 +++++++++---------- 1 file changed, 86 insertions(+), 96 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 8c59a1c00bc..064447c54ad 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -453,122 +453,112 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( creds.setPassword(password); } - bool retry_without_zero_copy = false; + PooledReadWriteBufferFromHTTP in{ + uri, + Poco::Net::HTTPRequest::HTTP_POST, + {}, + timeouts, + creds, + DBMS_DEFAULT_BUFFER_SIZE, + 0, /* no redirects */ + data_settings->replicated_max_parallel_fetches_for_host + }; + + int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); + + ReservationPtr reservation; + size_t sum_files_size = 0; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) { - PooledReadWriteBufferFromHTTP in{ - uri, - Poco::Net::HTTPRequest::HTTP_POST, - {}, - timeouts, - creds, - DBMS_DEFAULT_BUFFER_SIZE, - 0, /* no redirects */ - data_settings->replicated_max_parallel_fetches_for_host - }; - - int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); - - ReservationPtr reservation; - size_t sum_files_size = 0; - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) + readBinary(sum_files_size, in); + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { - readBinary(sum_files_size, in); - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) + IMergeTreeDataPart::TTLInfos ttl_infos; + String ttl_infos_string; + readBinary(ttl_infos_string, in); + ReadBufferFromString ttl_infos_buffer(ttl_infos_string); + assertString("ttl format version: 1\n", ttl_infos_buffer); + ttl_infos.read(ttl_infos_buffer); + if (!disk) { - IMergeTreeDataPart::TTLInfos ttl_infos; - String ttl_infos_string; - readBinary(ttl_infos_string, in); - ReadBufferFromString ttl_infos_buffer(ttl_infos_string); - assertString("ttl format version: 1\n", ttl_infos_buffer); - ttl_infos.read(ttl_infos_buffer); - if (!disk) - { - reservation - = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); - if (!reservation) - reservation - = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); - } - } - else if (!disk) - { - reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr); + reservation + = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); if (!reservation) - reservation = data.reserveSpace(sum_files_size); + reservation + = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); } } else if (!disk) { - /// We don't know real size of part because sender server version is too old - reservation = data.makeEmptyReservationOnLargestDisk(); + reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr); + if (!reservation) + reservation = data.reserveSpace(sum_files_size); } - if (!disk) - disk = reservation->getDisk(); + } + else if (!disk) + { + /// We don't know real size of part because sender server version is too old + reservation = data.makeEmptyReservationOnLargestDisk(); + } + if (!disk) + disk = reservation->getDisk(); - bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch - && sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch); + bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch + && sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch); - String part_type = "Wide"; - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) - readStringBinary(part_type, in); + String part_type = "Wide"; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) + readStringBinary(part_type, in); - UUID part_uuid = UUIDHelpers::Nil; - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) - readUUIDText(part_uuid, in); + UUID part_uuid = UUIDHelpers::Nil; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) + readUUIDText(part_uuid, in); - String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); - if (!remote_fs_metadata.empty()) + String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); + if (!remote_fs_metadata.empty()) + { + if (!try_zero_copy) + throw Exception("Got unexpected 'remote_fs_metadata' cookie", ErrorCodes::LOGICAL_ERROR); + if (std::find(capability.begin(), capability.end(), remote_fs_metadata) == capability.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie {}, expect one from {}", remote_fs_metadata, fmt::join(capability, ", ")); + if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version); + if (part_type == "InMemory") + throw Exception("Got 'remote_fs_metadata' cookie for in-memory part", ErrorCodes::INCORRECT_PART_TYPE); + + try { - if (!try_zero_copy) - throw Exception("Got unexpected 'remote_fs_metadata' cookie", ErrorCodes::LOGICAL_ERROR); - if (std::find(capability.begin(), capability.end(), remote_fs_metadata) == capability.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie {}, expect one from {}", remote_fs_metadata, fmt::join(capability, ", ")); - if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version); - if (part_type == "InMemory") - throw Exception("Got 'remote_fs_metadata' cookie for in-memory part", ErrorCodes::INCORRECT_PART_TYPE); - - try - { - return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); - } - catch (const Exception & e) - { - if (e.code() != ErrorCodes::S3_ERROR && e.code() != ErrorCodes::ZERO_COPY_REPLICATION_ERROR) - throw; - - LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy.")); - /// Try again later but without zero-copy - retry_without_zero_copy = true; - } + return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); } - else + catch (const Exception & e) { - auto storage_id = data.getStorageID(); - String new_part_path = part_type == "InMemory" ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; - auto entry = data.getContext()->getReplicatedFetchList().insert( - storage_id.getDatabaseName(), storage_id.getTableName(), - part_info.partition_id, part_name, new_part_path, - replica_path, uri, to_detached, sum_files_size); + if (e.code() != ErrorCodes::S3_ERROR && e.code() != ErrorCodes::ZERO_COPY_REPLICATION_ERROR) + throw; - in.setNextCallback(ReplicatedFetchReadCallback(*entry)); - - size_t projections = 0; - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) - readBinary(projections, in); - - MergeTreeData::DataPart::Checksums checksums; - return part_type == "InMemory" - ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) - : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); + LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy.")); + /// Try again but without zero-copy + return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, + user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); } } - if (retry_without_zero_copy) - return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, - user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't fetch part and no retry. It is a bug."); + auto storage_id = data.getStorageID(); + String new_part_path = part_type == "InMemory" ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; + auto entry = data.getContext()->getReplicatedFetchList().insert( + storage_id.getDatabaseName(), storage_id.getTableName(), + part_info.partition_id, part_name, new_part_path, + replica_path, uri, to_detached, sum_files_size); + + in.setNextCallback(ReplicatedFetchReadCallback(*entry)); + + size_t projections = 0; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) + readBinary(projections, in); + + MergeTreeData::DataPart::Checksums checksums; + return part_type == "InMemory" + ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) + : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( From bdc537ead355fe55bef66c308194752ce6bfd4a6 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 24 May 2022 14:28:33 +0200 Subject: [PATCH 477/615] Column compareImpl devirtualize compare call --- src/Columns/IColumnImpl.h | 19 ++++++++----------- .../Transforms/PartialSortingTransform.cpp | 4 +--- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index 7e7ff3c32d4..939127a54a1 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -81,7 +81,8 @@ void IColumn::compareImpl(const Derived & rhs, size_t rhs_row_num, if constexpr (use_indexes) { num_indexes = row_indexes->size(); - next_index = indexes = row_indexes->data(); + indexes = row_indexes->data(); + next_index = indexes; } compare_results.resize(num_rows); @@ -100,15 +101,8 @@ void IColumn::compareImpl(const Derived & rhs, size_t rhs_row_num, if constexpr (use_indexes) row = indexes[i]; - int res = compareAt(row, rhs_row_num, rhs, nan_direction_hint); - - /// We need to convert int to Int8. Sometimes comparison return values which do not fit in one byte. - if (res < 0) - compare_results[row] = -1; - else if (res > 0) - compare_results[row] = 1; - else - compare_results[row] = 0; + int res = static_cast(this)->compareAt(row, rhs_row_num, rhs, nan_direction_hint); + compare_results[row] = static_cast(res); if constexpr (reversed) compare_results[row] = -compare_results[row]; @@ -124,7 +118,10 @@ void IColumn::compareImpl(const Derived & rhs, size_t rhs_row_num, } if constexpr (use_indexes) - row_indexes->resize(next_index - row_indexes->data()); + { + size_t equal_row_indexes_size = next_index - row_indexes->data(); + row_indexes->resize(equal_row_indexes_size); + } } template diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 6a787a6cd15..3687fa770f0 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -81,9 +81,7 @@ size_t getFilterMask(const ColumnRawPtrs & lhs, const ColumnRawPtrs & rhs, size_ { /// Leave only rows that are less then row from rhs. filter[i] = compare_results[i] < 0; - - if (filter[i]) - ++result_size_hint; + result_size_hint += filter[i]; } return result_size_hint; From 9a19309e694a3cdb58bec152c2b511ddaf8a9b94 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 24 May 2022 14:46:29 +0200 Subject: [PATCH 478/615] Slightly better fix --- src/Storages/MergeTree/DataPartsExchange.cpp | 41 +++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 064447c54ad..0087a8892d4 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -453,29 +453,28 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( creds.setPassword(password); } - PooledReadWriteBufferFromHTTP in{ + std::unique_ptr in = std::make_unique( uri, Poco::Net::HTTPRequest::HTTP_POST, - {}, + nullptr, timeouts, creds, DBMS_DEFAULT_BUFFER_SIZE, 0, /* no redirects */ - data_settings->replicated_max_parallel_fetches_for_host - }; + static_cast(data_settings->replicated_max_parallel_fetches_for_host)); - int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); + int server_protocol_version = parse(in->getResponseCookie("server_protocol_version", "0")); ReservationPtr reservation; size_t sum_files_size = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) { - readBinary(sum_files_size, in); + readBinary(sum_files_size, *in); if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { IMergeTreeDataPart::TTLInfos ttl_infos; String ttl_infos_string; - readBinary(ttl_infos_string, in); + readBinary(ttl_infos_string, *in); ReadBufferFromString ttl_infos_buffer(ttl_infos_string); assertString("ttl format version: 1\n", ttl_infos_buffer); ttl_infos.read(ttl_infos_buffer); @@ -508,13 +507,13 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( String part_type = "Wide"; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) - readStringBinary(part_type, in); + readStringBinary(part_type, *in); UUID part_uuid = UUIDHelpers::Nil; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) - readUUIDText(part_uuid, in); + readUUIDText(part_uuid, *in); - String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); + String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); if (!remote_fs_metadata.empty()) { if (!try_zero_copy) @@ -528,7 +527,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( try { - return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); + return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, *in, throttler); } catch (const Exception & e) { @@ -536,6 +535,18 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( throw; LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy.")); + + /// It's important to release session from HTTP pool. Otherwise it's possible to get deadlock + /// on http pool. + try + { + in.reset(); + } + catch (...) + { + tryLogCurrentException(log); + } + /// Try again but without zero-copy return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); @@ -549,16 +560,16 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( part_info.partition_id, part_name, new_part_path, replica_path, uri, to_detached, sum_files_size); - in.setNextCallback(ReplicatedFetchReadCallback(*entry)); + in->setNextCallback(ReplicatedFetchReadCallback(*entry)); size_t projections = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) - readBinary(projections, in); + readBinary(projections, *in); MergeTreeData::DataPart::Checksums checksums; return part_type == "InMemory" - ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) - : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); + ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, *in, projections, throttler) + : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, *in, projections, checksums, throttler); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( From 86180614e737bd23c302c26d083b6e3edcb877df Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 24 May 2022 15:33:03 +0200 Subject: [PATCH 479/615] Fixed tests --- src/Functions/FunctionsExternalDictionaries.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index fc1bb07bda7..03833f9a2b2 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1183,7 +1183,7 @@ public: if (arguments.size() == 3) { - if (!isColumnConst(*arguments[2].column)) + if (!arguments[2].column || !isColumnConst(*arguments[2].column)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of third argument of function {}. Expected const unsigned integer.", getName()); From 6fb51e8bd32dd541ed82cd590f33e0fc31312da1 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 24 May 2022 17:06:06 +0200 Subject: [PATCH 480/615] Function hasAll added dynamic dispatch --- .../GatherUtils/sliceHasImplAnyAll.h | 32 +++++-------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/src/Functions/GatherUtils/sliceHasImplAnyAll.h b/src/Functions/GatherUtils/sliceHasImplAnyAll.h index 68f31006b4f..9933e0d2c5f 100644 --- a/src/Functions/GatherUtils/sliceHasImplAnyAll.h +++ b/src/Functions/GatherUtils/sliceHasImplAnyAll.h @@ -4,18 +4,11 @@ #include "Slices.h" #include "sliceEqualElements.h" -#if defined(__SSE4_2__) - #include - #include - #include -#endif - -#if defined(__AVX2__) - #include -#endif - #include +#if USE_MULTITARGET_CODE +#include +#endif namespace DB::GatherUtils { @@ -67,15 +60,14 @@ inline ALWAYS_INLINE bool hasAllIntegralLoopRemainder( return true; } - -#if defined(__AVX2__) +#if USE_MULTITARGET_CODE DECLARE_AVX2_SPECIFIC_CODE ( // AVX2 Int64, UInt64 specialization template requires (std::is_same_v || std::is_same_v) -inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64( +bool sliceHasImplAnyAllImplInt64( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -165,7 +157,7 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64( // AVX2 Int32, UInt32 specialization template requires (std::is_same_v || std::is_same_v) -inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32( +bool sliceHasImplAnyAllImplInt32( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -278,7 +270,7 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32( // AVX2 Int16, UInt16 specialization template requires (std::is_same_v || std::is_same_v) -inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16( +bool sliceHasImplAnyAllImplInt16( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -422,10 +414,6 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16( ) -#endif - -#if defined(__SSE4_2__) - DECLARE_SSE42_SPECIFIC_CODE ( // SSE4.2 Int64, UInt64 specialization @@ -895,8 +883,6 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAll(const FirstSliceType & first, const #if USE_MULTITARGET_CODE if constexpr (search_type == ArraySearchType::All && std::is_same_v) { - -#if defined(__AVX2__) if (isArchSupported(TargetArch::AVX2)) { if constexpr (std::is_same_v> || std::is_same_v>) @@ -912,9 +898,7 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAll(const FirstSliceType & first, const return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt64(first, second, first_null_map, second_null_map); } } -#endif - - if (isArchSupported(TargetArch::SSE42)) + else if (isArchSupported(TargetArch::SSE42)) { if constexpr (std::is_same_v> || std::is_same_v>) { From e8b329595e52d860b03f8eeb368e7568a0302ac3 Mon Sep 17 00:00:00 2001 From: guykohen Date: Tue, 24 May 2022 12:18:58 -0400 Subject: [PATCH 481/615] Remove height restrictions from the query div in play web tool, and make sure width of the query box won't shrink below 100%. --- programs/server/play.html | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 06fc5d8de9a..fb33df0d320 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -129,14 +129,15 @@ #query_div { - /* Make enough space for even huge queries. */ - height: 20%; + /* Make enough space for medium/large queries but allowing query textarea to grow. */ + min-height: 20%; + display: grid; } #query { - height: 100%; - width: 100%; + /* Keeps query text-area's width full screen even when user adjusting the width of the query box. */ + min-width: 100%; } #inputs From 59b4d4a643c24244d28a991a93346c9f8fd330bf Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 24 May 2022 21:08:30 +0300 Subject: [PATCH 482/615] ALTER COMMENT is now local-only operation and immediately observable --- .../ReplicatedMergeTreeTableMetadata.cpp | 22 +----------- .../ReplicatedMergeTreeTableMetadata.h | 6 +--- src/Storages/StorageReplicatedMergeTree.cpp | 36 ++++++++++++------- 3 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 393c2eb0dd1..eee046206ba 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -73,7 +73,6 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr index_granularity_bytes = 0; constraints = metadata_snapshot->getConstraints().toString(); - comment = metadata_snapshot->comment; } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const @@ -109,9 +108,6 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!constraints.empty()) out << "constraints: " << constraints << "\n"; - - if (!comment.empty()) - out << "comment: " << quote << comment << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -159,18 +155,8 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) else index_granularity_bytes = 0; - String verb; - readStringUntilWhitespace(verb, in); - - if (verb == "constraints:") - { + if (checkString("constraints: ", in)) in >> " " >> constraints >> "\n"; - - readStringUntilWhitespace(verb, in); - } - - if (verb == "comment:") - in >> " " >> quote >> comment >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -364,12 +350,6 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl diff.new_constraints = from_zk.constraints; } - if (comment != from_zk.comment) - { - diff.comment_changed = true; - diff.comment = from_zk.comment; - } - return diff; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 246cf863d13..6d510d20304 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -30,7 +30,6 @@ struct ReplicatedMergeTreeTableMetadata String projections; String constraints; String ttl_table; - String comment; UInt64 index_granularity_bytes; ReplicatedMergeTreeTableMetadata() = default; @@ -62,13 +61,10 @@ struct ReplicatedMergeTreeTableMetadata bool ttl_table_changed = false; String new_ttl_table; - bool comment_changed = false; - String comment; - bool empty() const { return !sorting_key_changed && !sampling_expression_changed && !skip_indices_changed && !projections_changed - && !ttl_table_changed && !constraints_changed && !comment_changed; + && !ttl_table_changed && !constraints_changed; } }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a72866d1dde..7fc9e6acf59 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1152,8 +1152,6 @@ void StorageReplicatedMergeTree::setTableStructure( } } - if (metadata_diff.comment_changed) - new_metadata.comment = metadata_diff.comment; } /// Changes in columns may affect following metadata fields @@ -1201,6 +1199,7 @@ void StorageReplicatedMergeTree::setTableStructure( auto table_id = getStorageID(); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(getContext(), table_id, new_metadata); + setInMemoryMetadata(new_metadata); } @@ -4717,9 +4716,9 @@ void StorageReplicatedMergeTree::alter( auto table_id = getStorageID(); - if (commands.isSettingsAlter()) + if (commands.isSettingsAlter() || commands.isCommentAlter()) { - /// We don't replicate storage_settings_ptr ALTER. It's local operation. + /// We don't replicate storage_settings_ptr or table comment ALTER. Those are local operations. /// Also we don't upgrade alter lock to table structure lock. StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); commands.apply(future_metadata, query_context); @@ -4729,6 +4728,7 @@ void StorageReplicatedMergeTree::alter( changeSettings(future_metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, future_metadata); + setInMemoryMetadata(future_metadata); return; } @@ -4779,11 +4779,6 @@ void StorageReplicatedMergeTree::alter( future_metadata_in_zk.ttl_table = ""; } - if (future_metadata.comment != current_metadata->comment) - { - future_metadata_in_zk.comment = future_metadata.comment; - } - String new_indices_str = future_metadata.secondary_indices.toString(); if (new_indices_str != current_metadata->secondary_indices.toString()) future_metadata_in_zk.skip_indices = new_indices_str; @@ -4806,13 +4801,26 @@ void StorageReplicatedMergeTree::alter( String new_columns_str = future_metadata.columns.toString(); ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "columns", new_columns_str, -1)); - if (ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes)) + // Local-only operations. + const bool settings_changed = ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes); + const bool comment_changed = current_metadata->comment != future_metadata.comment; + + if (settings_changed || comment_changed) { - /// Just change settings StorageInMemoryMetadata metadata_copy = *current_metadata; - metadata_copy.settings_changes = future_metadata.settings_changes; - changeSettings(metadata_copy.settings_changes, table_lock_holder); + + if (comment_changed) + metadata_copy.comment = future_metadata.comment; + + if (settings_changed) + { + /// Just change settings + metadata_copy.settings_changes = future_metadata.settings_changes; + changeSettings(metadata_copy.settings_changes, table_lock_holder); + } + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, metadata_copy); + setInMemoryMetadata(metadata_copy); } /// We can be sure, that in case of successful commit in zookeeper our @@ -4870,7 +4878,9 @@ void StorageReplicatedMergeTree::alter( String metadata_zk_path = fs::path(txn->getDatabaseZooKeeperPath()) / "metadata" / escapeForFileName(table_id.table_name); auto ast = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getCreateTableQuery(table_id.table_name, query_context); applyMetadataChangesToCreateQuery(ast, future_metadata); + setInMemoryMetadata(future_metadata); ops.emplace_back(zkutil::makeSetRequest(metadata_zk_path, getObjectDefinitionFromCreateQuery(ast), -1)); + } Coordination::Responses results; From 028f15c4fa728625817498f15f018eb652de0bcf Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 24 May 2022 13:22:56 +0200 Subject: [PATCH 483/615] Review comment: Throw LOGICAL_ERROR for different sizes of haystack / needles --- src/Functions/MatchImpl.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 5e7efd0a94a..54aaa3116fd 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -18,6 +18,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -410,7 +411,7 @@ struct MatchImpl const size_t haystack_size = haystack_offsets.size(); if (haystack_size != needle_offset.size()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + throw Exception(ErrorCodes::LOGICAL_ERROR, "Function '{}' unexpectedly received a different number of haystacks and needles", name); if (start_pos_ != nullptr) @@ -527,7 +528,7 @@ struct MatchImpl const size_t haystack_size = haystack.size()/N; if (haystack_size != needle_offset.size()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + throw Exception(ErrorCodes::LOGICAL_ERROR, "Function '{}' unexpectedly received a different number of haystacks and needles", name); if (start_pos_ != nullptr) From 13bdabce3aea3596a241489917bdd8e58f965f6d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 24 May 2022 21:21:12 +0300 Subject: [PATCH 484/615] Update 02302_join_auto_lc_nullable_bug.sql --- tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql index 7f7285d5472..469476c82bf 100644 --- a/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql +++ b/tests/queries/0_stateless/02302_join_auto_lc_nullable_bug.sql @@ -1,3 +1,4 @@ +-- Tags: no-backward-compatibility-check SET max_bytes_in_join = '100', join_algorithm = 'auto'; From 96833b8696a81d2ede5972fcb218b2dcb581bc82 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 24 May 2022 20:41:48 +0200 Subject: [PATCH 485/615] ColumnImpl compareImpl added assert for compare result --- src/Columns/IColumnImpl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index 939127a54a1..e90503cbad2 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -102,6 +102,7 @@ void IColumn::compareImpl(const Derived & rhs, size_t rhs_row_num, row = indexes[i]; int res = static_cast(this)->compareAt(row, rhs_row_num, rhs, nan_direction_hint); + assert(res == 1 || res == -1 || res == 0); compare_results[row] = static_cast(res); if constexpr (reversed) From c4dc0f7cda771480fd9bd2ea49e49f31df638171 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 24 May 2022 18:56:22 +0000 Subject: [PATCH 486/615] Fix ORDER BY optimization in case of GROUPING SETS --- src/Interpreters/TreeOptimizer.cpp | 23 +++++++++++++++---- .../02304_grouping_set_order_by.reference | 8 +++++++ .../02304_grouping_set_order_by.sql | 11 +++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02304_grouping_set_order_by.reference create mode 100644 tests/queries/0_stateless/02304_grouping_set_order_by.sql diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 537c18beaa1..42628fbb905 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -459,11 +459,26 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context std::unordered_set group_by_hashes; if (auto group_by = select_query->groupBy()) { - for (auto & elem : group_by->children) + if (select_query->group_by_with_grouping_sets) { - auto hash = elem->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); - group_by_hashes.insert(key); + for (auto & set : group_by->children) + { + for (auto & elem : set->children) + { + auto hash = elem->getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + group_by_hashes.insert(key); + } + } + } + else + { + for (auto & elem : group_by->children) + { + auto hash = elem->getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + group_by_hashes.insert(key); + } } } diff --git a/tests/queries/0_stateless/02304_grouping_set_order_by.reference b/tests/queries/0_stateless/02304_grouping_set_order_by.reference new file mode 100644 index 00000000000..c1d666e9167 --- /dev/null +++ b/tests/queries/0_stateless/02304_grouping_set_order_by.reference @@ -0,0 +1,8 @@ +2020-01-01 00:00:00 60 +2020-01-01 00:00:00 id0 20 +2020-01-01 00:00:00 id1 20 +2020-01-01 00:00:00 id2 20 +2020-01-01 01:00:00 40 +2020-01-01 01:00:00 id0 14 +2020-01-01 01:00:00 id1 13 +2020-01-01 01:00:00 id2 13 diff --git a/tests/queries/0_stateless/02304_grouping_set_order_by.sql b/tests/queries/0_stateless/02304_grouping_set_order_by.sql new file mode 100644 index 00000000000..d1b4ab13446 --- /dev/null +++ b/tests/queries/0_stateless/02304_grouping_set_order_by.sql @@ -0,0 +1,11 @@ +SELECT toStartOfHour(time) AS timex, id, count() +FROM +( + SELECT + concat('id', toString(number % 3)) AS id, + toDateTime('2020-01-01') + (number * 60) AS time + FROM numbers(100) +) +GROUP BY + GROUPING SETS ( (timex, id), (timex)) +ORDER BY timex ASC, id; From 72623c2e14cde56afd1e5da5a91e40b52848d8a6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 22:08:46 +0200 Subject: [PATCH 487/615] Add Graviton 3 vs Graviton 2 comparison --- website/benchmark/hardware/results/aws_c6g_16xlarge.json | 0 website/benchmark/hardware/results/aws_c7g_16xlarge.json | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 website/benchmark/hardware/results/aws_c6g_16xlarge.json create mode 100644 website/benchmark/hardware/results/aws_c7g_16xlarge.json diff --git a/website/benchmark/hardware/results/aws_c6g_16xlarge.json b/website/benchmark/hardware/results/aws_c6g_16xlarge.json new file mode 100644 index 00000000000..e69de29bb2d diff --git a/website/benchmark/hardware/results/aws_c7g_16xlarge.json b/website/benchmark/hardware/results/aws_c7g_16xlarge.json new file mode 100644 index 00000000000..e69de29bb2d From de6506819e8e22824e7400b1c69a1a9a070b33e6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 22:10:23 +0200 Subject: [PATCH 488/615] Add Graviton 3 vs Graviton 2 comparison --- .../hardware/results/aws_c6g_16xlarge.json | 54 +++++++++++++++++++ .../hardware/results/aws_c7g_16xlarge.json | 54 +++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/website/benchmark/hardware/results/aws_c6g_16xlarge.json b/website/benchmark/hardware/results/aws_c6g_16xlarge.json index e69de29bb2d..a2e46fd9172 100644 --- a/website/benchmark/hardware/results/aws_c6g_16xlarge.json +++ b/website/benchmark/hardware/results/aws_c6g_16xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS c6g.16xlarge (Graviton 2)", + "system_full": "AWS c6g.16xlarge (Graviton 2) 64 vCPU, 128 GiB RAM, EBS", + "time": "2022-05-24 00:00:00", + "kind": "cloud", + "result": + [ +[0.002, 0.002, 0.002], +[0.032, 0.021, 0.021], +[0.108, 0.022, 0.023], +[0.311, 0.054, 0.057], +[0.314, 0.068, 0.069], +[0.858, 0.099, 0.098], +[0.002, 0.002, 0.002], +[0.034, 0.031, 0.031], +[0.417, 0.114, 0.112], +[0.798, 0.126, 0.127], +[0.427, 0.071, 0.071], +[0.597, 0.074, 0.072], +[1.030, 0.148, 0.144], +[1.725, 0.188, 0.186], +[0.867, 0.173, 0.173], +[0.338, 0.209, 0.181], +[1.688, 0.435, 0.445], +[1.348, 0.344, 0.311], +[3.109, 0.851, 0.767], +[0.230, 0.059, 0.080], +[9.048, 0.285, 0.234], +[10.136, 0.238, 0.223], +[19.584, 0.770, 0.740], +[16.139, 1.185, 1.202], +[1.832, 0.085, 0.108], +[0.888, 0.063, 0.060], +[2.460, 0.108, 0.110], +[9.312, 0.255, 0.242], +[7.396, 0.289, 0.284], +[0.822, 0.855, 0.865], +[1.304, 0.151, 0.144], +[4.695, 0.278, 0.212], +[3.688, 1.301, 1.427], +[9.189, 0.824, 0.818], +[9.126, 0.812, 0.777], +[0.309, 0.291, 0.331], +[0.209, 0.166, 0.142], +[0.087, 0.066, 0.083], +[0.114, 0.076, 0.072], +[0.517, 0.345, 0.368], +[0.072, 0.028, 0.019], +[0.047, 0.016, 0.030], +[0.006, 0.022, 0.019] + ] + } +] diff --git a/website/benchmark/hardware/results/aws_c7g_16xlarge.json b/website/benchmark/hardware/results/aws_c7g_16xlarge.json index e69de29bb2d..91230ecceee 100644 --- a/website/benchmark/hardware/results/aws_c7g_16xlarge.json +++ b/website/benchmark/hardware/results/aws_c7g_16xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS c7g.16xlarge (Graviton 3)", + "system_full": "AWS c7g.16xlarge (Graviton 3) 64 vCPU, 128 GiB RAM, EBS", + "time": "2022-05-24 00:00:00", + "kind": "cloud", + "result": + [ +[0.002, 0.002, 0.002], +[0.031, 0.022, 0.023], +[0.066, 0.025, 0.025], +[0.240, 0.061, 0.059], +[0.328, 0.073, 0.076], +[0.955, 0.101, 0.098], +[0.002, 0.002, 0.002], +[0.035, 0.030, 0.030], +[0.499, 0.113, 0.115], +[0.704, 0.127, 0.127], +[0.452, 0.070, 0.070], +[0.613, 0.074, 0.072], +[1.060, 0.147, 0.144], +[1.749, 0.190, 0.187], +[0.933, 0.176, 0.175], +[0.408, 0.206, 0.188], +[1.714, 0.476, 0.464], +[1.391, 0.349, 0.307], +[3.271, 0.876, 0.719], +[0.375, 0.079, 0.071], +[9.094, 0.270, 0.293], +[10.251, 0.236, 0.222], +[19.763, 0.783, 0.839], +[16.380, 1.164, 1.192], +[1.861, 0.112, 0.114], +[0.863, 0.062, 0.060], +[2.499, 0.103, 0.113], +[9.448, 0.257, 0.245], +[7.546, 0.288, 0.285], +[0.822, 0.837, 0.837], +[1.352, 0.151, 0.142], +[4.743, 0.224, 0.214], +[3.807, 1.236, 1.366], +[10.096, 0.805, 0.780], +[9.191, 0.830, 0.792], +[0.320, 0.304, 0.294], +[0.209, 0.143, 0.175], +[0.099, 0.066, 0.068], +[0.141, 0.073, 0.064], +[0.499, 0.386, 0.372], +[0.061, 0.030, 0.032], +[0.035, 0.030, 0.028], +[0.016, 0.016, 0.004] + ] + } +] From 919a0ae8353862a6f07f507e1b0f29fe2e1c52d9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 22:14:13 +0200 Subject: [PATCH 489/615] Fix trash --- docs/tools/requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index b6f2d4549e5..afd6b1a889d 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -2,7 +2,6 @@ Babel==2.9.1 Jinja2==3.0.3 Markdown==3.3.2 MarkupSafe==2.1.1 -MarkupSafe==2.1.1 PyYAML==6.0 Pygments>=2.12.0 beautifulsoup4==4.9.1 @@ -18,7 +17,6 @@ mkdocs-material==8.2.15 mkdocs==1.3.0 mkdocs_material_extensions==1.0.3 packaging==21.3 -pygments==2.12.0 pymdown_extensions==9.4 pyparsing==3.0.9 python-slugify==4.0.1 From 9fc935d062358f488272223d5722223dbd7c977f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 22:21:02 +0200 Subject: [PATCH 490/615] Correct copy-paste --- .../hardware/results/aws_c6g_16xlarge.json | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/website/benchmark/hardware/results/aws_c6g_16xlarge.json b/website/benchmark/hardware/results/aws_c6g_16xlarge.json index a2e46fd9172..364b40f657a 100644 --- a/website/benchmark/hardware/results/aws_c6g_16xlarge.json +++ b/website/benchmark/hardware/results/aws_c6g_16xlarge.json @@ -7,48 +7,48 @@ "result": [ [0.002, 0.002, 0.002], -[0.032, 0.021, 0.021], -[0.108, 0.022, 0.023], -[0.311, 0.054, 0.057], -[0.314, 0.068, 0.069], -[0.858, 0.099, 0.098], -[0.002, 0.002, 0.002], -[0.034, 0.031, 0.031], -[0.417, 0.114, 0.112], -[0.798, 0.126, 0.127], -[0.427, 0.071, 0.071], -[0.597, 0.074, 0.072], -[1.030, 0.148, 0.144], -[1.725, 0.188, 0.186], -[0.867, 0.173, 0.173], -[0.338, 0.209, 0.181], -[1.688, 0.435, 0.445], -[1.348, 0.344, 0.311], -[3.109, 0.851, 0.767], -[0.230, 0.059, 0.080], -[9.048, 0.285, 0.234], -[10.136, 0.238, 0.223], -[19.584, 0.770, 0.740], -[16.139, 1.185, 1.202], -[1.832, 0.085, 0.108], -[0.888, 0.063, 0.060], -[2.460, 0.108, 0.110], -[9.312, 0.255, 0.242], -[7.396, 0.289, 0.284], -[0.822, 0.855, 0.865], -[1.304, 0.151, 0.144], -[4.695, 0.278, 0.212], -[3.688, 1.301, 1.427], -[9.189, 0.824, 0.818], -[9.126, 0.812, 0.777], -[0.309, 0.291, 0.331], -[0.209, 0.166, 0.142], -[0.087, 0.066, 0.083], -[0.114, 0.076, 0.072], -[0.517, 0.345, 0.368], -[0.072, 0.028, 0.019], -[0.047, 0.016, 0.030], -[0.006, 0.022, 0.019] +[0.051, 0.024, 0.026], +[0.037, 0.021, 0.028], +[0.102, 0.065, 0.061], +[0.243, 0.080, 0.080], +[0.976, 0.138, 0.138], +[0.003, 0.002, 0.002], +[0.044, 0.040, 0.039], +[0.204, 0.145, 0.146], +[0.799, 0.165, 0.165], +[0.306, 0.095, 0.095], +[0.523, 0.101, 0.096], +[0.973, 0.226, 0.224], +[1.520, 0.282, 0.277], +[0.645, 0.239, 0.236], +[0.260, 0.312, 0.280], +[1.535, 0.660, 0.629], +[1.426, 0.470, 0.427], +[3.456, 1.372, 1.138], +[0.147, 0.119, 0.079], +[9.101, 0.406, 0.358], +[10.117, 0.330, 0.323], +[19.495, 0.756, 0.748], +[16.173, 1.500, 1.532], +[1.832, 0.105, 0.094], +[0.836, 0.092, 0.090], +[2.363, 0.108, 0.099], +[9.269, 0.367, 0.363], +[7.317, 0.422, 0.414], +[0.918, 1.020, 1.058], +[1.347, 0.210, 0.209], +[4.535, 0.343, 0.335], +[4.288, 2.411, 2.501], +[9.310, 1.240, 1.172], +[9.301, 1.209, 1.205], +[0.446, 0.428, 0.421], +[0.245, 0.207, 0.202], +[0.107, 0.091, 0.098], +[0.112, 0.095, 0.101], +[0.546, 0.485, 0.444], +[0.061, 0.049, 0.037], +[0.041, 0.035, 0.033], +[0.006, 0.005, 0.005] ] } ] From 60f060a3bb43ec9f19eba979091bccbd08fdd775 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 22:21:40 +0200 Subject: [PATCH 491/615] Fix README --- website/README.md | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/website/README.md b/website/README.md index 57cd87cbfe2..f96f1e0075d 100644 --- a/website/README.md +++ b/website/README.md @@ -22,19 +22,7 @@ pip3 install -r requirements.txt virtualenv build -./build.py --skip-multi-page --skip-blog --skip-docs --livereload 8080 +./build.py --livereload 8080 # Open the web browser and go to http://localhost:8080/ ``` - -# How to quickly test the blog - -``` -./build.py --skip-multi-page --skip-docs --livereload 8080 -``` - -# How to quickly test the broken links in docs - -``` -./build.py --skip-multi-page --skip-blog --lang en --livereload 8080 -``` From 94e693f96e2fcaaffe61ffcc84a03f437dfd6aab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 24 May 2022 23:40:10 +0200 Subject: [PATCH 492/615] Fix Docker --- docker/server/Dockerfile.ubuntu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index d5f1a3929b7..9e063a006b6 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -16,12 +16,12 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list gnupg \ locales \ wget \ - tzdata + tzdata \ && apt-get clean ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION=22.1.1.* +ARG VERSION=22.5.1.* ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image From d773bd610d08953464ed31f2dcaeeed231378f46 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 25 May 2022 00:58:57 +0300 Subject: [PATCH 493/615] =?UTF-8?q?Revert=20"Remove=20height=20restriction?= =?UTF-8?q?s=20from=20the=20query=20div=20in=20play=20web=20tool,=20and=20?= =?UTF-8?q?m=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- programs/server/play.html | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 69ef616db09..6b530790ad0 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -148,15 +148,13 @@ #query_div { - /* Make enough space for medium/large queries but allowing query textarea to grow. */ - min-height: 20%; - display: grid; + height: 100%; } #query { - /* Keeps query text-area's width full screen even when user adjusting the width of the query box. */ - min-width: 100%; + height: 100%; + width: 100%; } #inputs From fbe16a1d33dfd71a387543bf51e4da105b55c290 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 20:15:04 +0200 Subject: [PATCH 494/615] Don't compare tags to testing by default --- utils/changelog/changelog.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py index c15a1600506..d616d842a4f 100755 --- a/utils/changelog/changelog.py +++ b/utils/changelog/changelog.py @@ -173,6 +173,11 @@ def parse_args() -> argparse.Namespace: "--gh-password", help="a password that should be used when user is given", ) + parser.add_argument( + "--with-testing-tags", + action="store_true", + help="by default '*-testing' tags are ignored, this argument enables them too", + ) parser.add_argument( "--from", dest="from_ref", @@ -296,7 +301,7 @@ def write_changelog(fd: TextIO, descriptions: Dict[str, List[Description]]): fd.write("\n") -def check_refs(from_ref: Optional[str], to_ref: str): +def check_refs(from_ref: Optional[str], to_ref: str, with_testing_tags: bool): global FROM_REF, TO_REF TO_REF = to_ref @@ -306,10 +311,13 @@ def check_refs(from_ref: Optional[str], to_ref: str): # Check from_ref if from_ref is None: # Get all tags pointing to TO_REF - tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'") + tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'").split("\n") logging.info("All tags pointing to %s:\n%s", TO_REF, tags) - exclude = " ".join([f"--exclude='{tag}'" for tag in tags.split("\n")]) - FROM_REF = runner.run(f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'") + if not with_testing_tags: + tags.append("*-testing") + exclude = " ".join([f"--exclude='{tag}'" for tag in tags]) + cmd = f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'" + FROM_REF = runner.run(cmd) else: runner.run(f"git rev-parse {FROM_REF}") FROM_REF = from_ref @@ -336,7 +344,7 @@ def main(): logging.info("Fetching all tags") runner.run("git fetch --tags", stderr=DEVNULL) - check_refs(args.from_ref, args.to_ref) + check_refs(args.from_ref, args.to_ref, args.with_testing_tags) set_sha_in_changelog() logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF) From 4b28ea92cac81bcf80a4ba594e73014bdb1e726f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 20:15:49 +0200 Subject: [PATCH 495/615] Use commit date for cherry-picked commits (later) --- utils/changelog/changelog.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py index d616d842a4f..367cfd056c1 100755 --- a/utils/changelog/changelog.py +++ b/utils/changelog/changelog.py @@ -352,9 +352,10 @@ def main(): # Get starting and ending dates for gathering PRs # Add one day after and before to mitigate TZ possible issues # `tag^{}` format gives commit ref when we have annotated tags - from_date = runner.run(f"git log -1 --format=format:%as '{FROM_REF}^{{}}'") + # format %cs gives a committer date, works better for cherry-picked commits + from_date = runner.run(f"git log -1 --format=format:%cs '{FROM_REF}^{{}}'") from_date = (date.fromisoformat(from_date) - timedelta(1)).isoformat() - to_date = runner.run(f"git log -1 --format=format:%as '{TO_REF}^{{}}'") + to_date = runner.run(f"git log -1 --format=format:%cs '{TO_REF}^{{}}'") to_date = (date.fromisoformat(to_date) + timedelta(1)).isoformat() # Get all PRs for the given time frame From 0e494c9ee71ca0a515d6c8b95f7b3e19e3f75441 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 20:18:27 +0200 Subject: [PATCH 496/615] Add caching for GitHub PR objects --- utils/changelog/.gitignore | 1 + utils/changelog/changelog.py | 55 ++++++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/utils/changelog/.gitignore b/utils/changelog/.gitignore index 9ab24b6c8b8..f0a3171ed34 100644 --- a/utils/changelog/.gitignore +++ b/utils/changelog/.gitignore @@ -1,3 +1,4 @@ *.md *.txt *.json +gh_cache diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py index 367cfd056c1..c20c6cfd072 100755 --- a/utils/changelog/changelog.py +++ b/utils/changelog/changelog.py @@ -3,8 +3,10 @@ import argparse import logging +import os.path as p +import os import re -from datetime import date, timedelta +from datetime import date, datetime, timedelta from queue import Empty, Queue from subprocess import CalledProcessError, DEVNULL from threading import Thread @@ -13,6 +15,7 @@ from typing import Dict, List, Optional, TextIO from fuzzywuzzy.fuzz import ratio # type: ignore from github import Github from github.NamedUser import NamedUser +from github.Issue import Issue from github.PullRequest import PullRequest from github.Repository import Repository from git_helper import is_shallow, git_runner as runner @@ -34,6 +37,8 @@ categories_preferred_order = ( FROM_REF = "" TO_REF = "" SHA_IN_CHANGELOG = [] # type: List[str] +GitHub = Github() +CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache") class Description: @@ -87,10 +92,10 @@ class Worker(Thread): def run(self): while not self.queue.empty(): try: - number = self.queue.get() + issue = self.queue.get() # type: Issue except Empty: break # possible race condition, just continue - api_pr = self.repo.get_pull(number) + api_pr = get_pull_cached(self.repo, issue.number, issue.updated_at) in_changelog = False merge_commit = api_pr.merge_commit_sha try: @@ -109,13 +114,31 @@ class Worker(Thread): self.queue.task_done() +def get_pull_cached( + repo: Repository, number: int, updated_at: Optional[datetime] = None +) -> PullRequest: + pr_cache_file = p.join(CACHE_PATH, f"{number}.pickle") + if updated_at is None: + updated_at = datetime.now() - timedelta(hours=-1) + + if p.isfile(pr_cache_file): + cache_updated = datetime.fromtimestamp(p.getmtime(pr_cache_file)) + if cache_updated > updated_at: + with open(pr_cache_file, "rb") as prfd: + return GitHub.load(prfd) # type: ignore + pr = repo.get_pull(number) + with open(pr_cache_file, "wb") as prfd: + GitHub.dump(pr, prfd) # type: ignore + return pr + + def get_descriptions( - repo: Repository, numbers: List[int], jobs: int + repo: Repository, issues: List[Issue], jobs: int ) -> Dict[str, List[Description]]: workers = [] # type: List[Worker] - queue = Queue() # type: Queue # (!?!?!?!??!) - for number in numbers: - queue.put(number) + queue = Queue() # type: Queue[Issue] + for issue in issues: + queue.put(issue) for _ in range(jobs): worker = Worker(queue, repo) worker.start() @@ -200,7 +223,10 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri if item.head.ref.startswith("backport/"): branch_parts = item.head.ref.split("/") if len(branch_parts) == 3: - item = repo.get_pull(int(branch_parts[-1])) + try: + item = get_pull_cached(repo, int(branch_parts[-1])) + except Exception as e: + logging.warning("unable to get backpoted PR, exception: %s", e) else: logging.warning( "The branch %s doesn't match backport template, using PR %s as is", @@ -337,6 +363,10 @@ def main(): format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s", level=log_levels[min(args.verbose, 3)], ) + # Create a cache directory + if not p.isdir(CACHE_PATH): + os.mkdir(CACHE_PATH, 0o700) + # Get the full repo if is_shallow(): logging.info("Unshallow repository") @@ -359,15 +389,16 @@ def main(): to_date = (date.fromisoformat(to_date) + timedelta(1)).isoformat() # Get all PRs for the given time frame - gh = Github( + global GitHub + GitHub = Github( args.gh_user_or_token, args.gh_password, per_page=100, pool_size=args.jobs ) query = f"type:pr repo:{args.repo} is:merged merged:{from_date}..{to_date}" - repo = gh.get_repo(args.repo) - api_prs = gh.search_issues(query=query, sort="created") + repo = GitHub.get_repo(args.repo) + api_prs = GitHub.search_issues(query=query, sort="created") logging.info("Found %s PRs for the query: '%s'", api_prs.totalCount, query) - pr_numbers = [pr.number for pr in api_prs] + pr_numbers = list(api_prs) descriptions = get_descriptions(repo, pr_numbers, args.jobs) From cd9486b103e34c23baa0685050bd73df20173cc3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 21:26:40 +0200 Subject: [PATCH 497/615] Update changed descriptions for v22.1.1.2542-prestable.md and v22.1.3.7-stable.md --- docs/changelogs/v22.1.1.2542-prestable.md | 2 +- docs/changelogs/v22.1.3.7-stable.md | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/changelogs/v22.1.1.2542-prestable.md b/docs/changelogs/v22.1.1.2542-prestable.md index f6418c5c3b9..e1e30e28ec6 100644 --- a/docs/changelogs/v22.1.1.2542-prestable.md +++ b/docs/changelogs/v22.1.1.2542-prestable.md @@ -6,7 +6,7 @@ * Add `left`, `right`, `leftUTF8`, `rightUTF8` functions. Fix error in implementation of `substringUTF8` function with negative offset (offset from the end of string). The functions `left` and `right` were previously implemented in parser. Upgrade notes: distributed queries with `left` or `right` functions without aliases may throw exception if cluster contains different versions of clickhouse-server. If you are upgrading your cluster and encounter this error, you should finish upgrading your cluster to ensure all nodes have the same version. Also you can add aliases (`AS something`) to the columns in your queries to avoid this issue. [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### New Feature -* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)). +* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_of_defaults_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)). * add grouping sets function, like GROUP BY grouping sets (a, b, (a, b)). [#26869](https://github.com/ClickHouse/ClickHouse/pull/26869) ([taylor12805](https://github.com/taylor12805)). * Added an ability to read from all replicas within a shard during distributed query. To enable this, set `allow_experimental_parallel_reading_from_replicas=true` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Start and stop servers when hosts and ports configuration changes. [#30549](https://github.com/ClickHouse/ClickHouse/pull/30549) ([Kevin Michel](https://github.com/kmichel-aiven)). diff --git a/docs/changelogs/v22.1.3.7-stable.md b/docs/changelogs/v22.1.3.7-stable.md index ffb0ec6048d..f8bb8821031 100644 --- a/docs/changelogs/v22.1.3.7-stable.md +++ b/docs/changelogs/v22.1.3.7-stable.md @@ -1,2 +1,9 @@ ### ClickHouse release v22.1.3.7-stable FIXME as compared to v22.1.2.2-stable +#### Improvement +* Backported in [#33793](https://github.com/ClickHouse/ClickHouse/issues/33793): Create parent directories in DiskS3::restoreFileOperations method. [#33730](https://github.com/ClickHouse/ClickHouse/pull/33730) ([ianton-ru](https://github.com/ianton-ru)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#33898](https://github.com/ClickHouse/ClickHouse/issues/33898): Fix usage of sparse columns (which can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization`). [#33849](https://github.com/ClickHouse/ClickHouse/pull/33849) ([Anton Popov](https://github.com/CurtizJ)). + From 352c7d6c21cdc9324b7aa2e477bb04c5c3593cc7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 24 May 2022 21:54:40 +0200 Subject: [PATCH 498/615] Add changelogs for 2021 --- docs/changelogs/v21.1.1.5646-prestable.md | 259 +++++++++++++++++++++ docs/changelogs/v21.1.2.15-stable.md | 28 +++ docs/changelogs/v21.1.3.32-stable.md | 31 +++ docs/changelogs/v21.1.4.46-stable.md | 22 ++ docs/changelogs/v21.1.5.4-stable.md | 12 + docs/changelogs/v21.1.6.13-stable.md | 12 + docs/changelogs/v21.1.7.1-stable.md | 11 + docs/changelogs/v21.1.8.30-stable.md | 29 +++ docs/changelogs/v21.1.9.41-stable.md | 17 ++ docs/changelogs/v21.10.1.8013-prestable.md | 138 +++++++++++ docs/changelogs/v21.10.2.15-stable.md | 65 ++++++ docs/changelogs/v21.10.3.9-stable.md | 45 ++++ docs/changelogs/v21.10.4.26-stable.md | 25 ++ docs/changelogs/v21.10.5.3-stable.md | 15 ++ docs/changelogs/v21.10.6.2-stable.md | 22 ++ docs/changelogs/v21.11.1.8636-prestable.md | 196 ++++++++++++++++ docs/changelogs/v21.11.10.1-stable.md | 2 + docs/changelogs/v21.11.11.1-stable.md | 6 + docs/changelogs/v21.11.2.2-stable.md | 8 + docs/changelogs/v21.11.3.6-stable.md | 10 + docs/changelogs/v21.11.4.14-stable.md | 17 ++ docs/changelogs/v21.11.5.33-stable.md | 33 +++ docs/changelogs/v21.11.6.7-stable.md | 24 ++ docs/changelogs/v21.11.7.9-stable.md | 15 ++ docs/changelogs/v21.11.8.4-stable.md | 12 + docs/changelogs/v21.11.9.1-stable.md | 6 + docs/changelogs/v21.12.1.9017-prestable.md | 206 ++++++++++++++++ docs/changelogs/v21.12.2.17-stable.md | 22 ++ docs/changelogs/v21.12.3.32-stable.md | 17 ++ docs/changelogs/v21.12.4.1-stable.md | 13 ++ docs/changelogs/v21.2.1.5869-prestable.md | 156 +++++++++++++ docs/changelogs/v21.2.10.48-stable.md | 11 + docs/changelogs/v21.2.2.8-stable.md | 166 +++++++++++++ docs/changelogs/v21.2.3.15-stable.md | 19 ++ docs/changelogs/v21.2.4.6-stable.md | 12 + docs/changelogs/v21.2.5.5-stable.md | 12 + docs/changelogs/v21.2.6.1-stable.md | 12 + docs/changelogs/v21.2.7.11-stable.md | 12 + docs/changelogs/v21.2.8.31-stable.md | 30 +++ docs/changelogs/v21.2.9.41-stable.md | 17 ++ docs/changelogs/v21.3.1.6185-prestable.md | 159 +++++++++++++ docs/changelogs/v21.3.10.1-lts.md | 8 + docs/changelogs/v21.3.11.5-lts.md | 9 + docs/changelogs/v21.3.12.2-lts.md | 12 + docs/changelogs/v21.3.13.9-lts.md | 43 ++++ docs/changelogs/v21.3.14.1-lts.md | 10 + docs/changelogs/v21.3.15.4-stable.md | 6 + docs/changelogs/v21.3.16.5-lts.md | 25 ++ docs/changelogs/v21.3.17.2-lts.md | 12 + docs/changelogs/v21.3.18.4-lts.md | 22 ++ docs/changelogs/v21.3.19.1-lts.md | 26 +++ docs/changelogs/v21.3.2.5-lts.md | 161 +++++++++++++ docs/changelogs/v21.3.20.1-lts.md | 10 + docs/changelogs/v21.3.3.14-lts.md | 11 + docs/changelogs/v21.3.4.25-lts.md | 18 ++ docs/changelogs/v21.3.5.42-lts.md | 27 +++ docs/changelogs/v21.3.6.55-lts.md | 19 ++ docs/changelogs/v21.3.7.62-stable.md | 12 + docs/changelogs/v21.3.8.76-lts.md | 21 ++ docs/changelogs/v21.3.9.83-lts.md | 15 ++ docs/changelogs/v21.4.1.6422-prestable.md | 173 ++++++++++++++ docs/changelogs/v21.4.2.10-prestable.md | 176 ++++++++++++++ docs/changelogs/v21.4.3.21-stable.md | 18 ++ docs/changelogs/v21.4.4.30-stable.md | 25 ++ docs/changelogs/v21.4.5.46-stable.md | 21 ++ docs/changelogs/v21.4.6.55-stable.md | 18 ++ docs/changelogs/v21.4.7.3-stable.md | 17 ++ docs/changelogs/v21.5.1.6601-prestable.md | 109 +++++++++ docs/changelogs/v21.5.2.25-prestable.md | 40 ++++ docs/changelogs/v21.5.3.1-prestable.md | 9 + docs/changelogs/v21.5.4.6-prestable.md | 8 + docs/changelogs/v21.5.5.12-stable.md | 11 + docs/changelogs/v21.5.6.6-stable.md | 17 ++ docs/changelogs/v21.5.7.9-stable.md | 42 ++++ docs/changelogs/v21.5.8.21-stable.md | 13 ++ docs/changelogs/v21.5.9.4-stable.md | 6 + docs/changelogs/v21.6.1.6891-prestable.md | 159 +++++++++++++ docs/changelogs/v21.6.2.7-prestable.md | 13 ++ docs/changelogs/v21.6.3.14-stable.md | 14 ++ docs/changelogs/v21.6.4.26-stable.md | 14 ++ docs/changelogs/v21.6.5.37-stable.md | 21 ++ docs/changelogs/v21.6.6.51-stable.md | 19 ++ docs/changelogs/v21.6.7.57-stable.md | 8 + docs/changelogs/v21.6.8.62-stable.md | 9 + docs/changelogs/v21.6.9.7-stable.md | 43 ++++ docs/changelogs/v21.7.1.7283-prestable.md | 178 ++++++++++++++ docs/changelogs/v21.7.10.4-stable.md | 19 ++ docs/changelogs/v21.7.11.3-stable.md | 7 + docs/changelogs/v21.7.2.7-stable.md | 17 ++ docs/changelogs/v21.7.3.14-stable.md | 11 + docs/changelogs/v21.7.4.18-stable.md | 10 + docs/changelogs/v21.7.5.29-stable.md | 16 ++ docs/changelogs/v21.7.6.39-stable.md | 14 ++ docs/changelogs/v21.7.7.47-stable.md | 8 + docs/changelogs/v21.7.8.58-stable.md | 12 + docs/changelogs/v21.7.9.7-stable.md | 27 +++ docs/changelogs/v21.8.1.7409-prestable.md | 88 +++++++ docs/changelogs/v21.8.10.19-lts.md | 12 + docs/changelogs/v21.8.11.4-lts.md | 40 ++++ docs/changelogs/v21.8.12.29-lts.md | 24 ++ docs/changelogs/v21.8.13.6-lts.md | 29 +++ docs/changelogs/v21.8.14.5-lts.md | 8 + docs/changelogs/v21.8.15.7-lts.md | 7 + docs/changelogs/v21.8.2.19-prestable.md | 29 +++ docs/changelogs/v21.8.3.44-lts.md | 28 +++ docs/changelogs/v21.8.4.51-lts.md | 11 + docs/changelogs/v21.8.5.7-lts.md | 31 +++ docs/changelogs/v21.8.6.15-lts.md | 25 ++ docs/changelogs/v21.8.7.22-lts.md | 8 + docs/changelogs/v21.8.8.29-lts.md | 13 ++ docs/changelogs/v21.8.9.13-lts.md | 31 +++ docs/changelogs/v21.9.1.8000-prestable.md | 190 +++++++++++++++ docs/changelogs/v21.9.2.17-stable.md | 45 ++++ docs/changelogs/v21.9.3.30-stable.md | 16 ++ docs/changelogs/v21.9.4.35-stable.md | 6 + docs/changelogs/v21.9.5.16-stable.md | 48 ++++ docs/changelogs/v21.9.6.24-stable.md | 57 +++++ 117 files changed, 4459 insertions(+) create mode 100644 docs/changelogs/v21.1.1.5646-prestable.md create mode 100644 docs/changelogs/v21.1.2.15-stable.md create mode 100644 docs/changelogs/v21.1.3.32-stable.md create mode 100644 docs/changelogs/v21.1.4.46-stable.md create mode 100644 docs/changelogs/v21.1.5.4-stable.md create mode 100644 docs/changelogs/v21.1.6.13-stable.md create mode 100644 docs/changelogs/v21.1.7.1-stable.md create mode 100644 docs/changelogs/v21.1.8.30-stable.md create mode 100644 docs/changelogs/v21.1.9.41-stable.md create mode 100644 docs/changelogs/v21.10.1.8013-prestable.md create mode 100644 docs/changelogs/v21.10.2.15-stable.md create mode 100644 docs/changelogs/v21.10.3.9-stable.md create mode 100644 docs/changelogs/v21.10.4.26-stable.md create mode 100644 docs/changelogs/v21.10.5.3-stable.md create mode 100644 docs/changelogs/v21.10.6.2-stable.md create mode 100644 docs/changelogs/v21.11.1.8636-prestable.md create mode 100644 docs/changelogs/v21.11.10.1-stable.md create mode 100644 docs/changelogs/v21.11.11.1-stable.md create mode 100644 docs/changelogs/v21.11.2.2-stable.md create mode 100644 docs/changelogs/v21.11.3.6-stable.md create mode 100644 docs/changelogs/v21.11.4.14-stable.md create mode 100644 docs/changelogs/v21.11.5.33-stable.md create mode 100644 docs/changelogs/v21.11.6.7-stable.md create mode 100644 docs/changelogs/v21.11.7.9-stable.md create mode 100644 docs/changelogs/v21.11.8.4-stable.md create mode 100644 docs/changelogs/v21.11.9.1-stable.md create mode 100644 docs/changelogs/v21.12.1.9017-prestable.md create mode 100644 docs/changelogs/v21.12.2.17-stable.md create mode 100644 docs/changelogs/v21.12.3.32-stable.md create mode 100644 docs/changelogs/v21.12.4.1-stable.md create mode 100644 docs/changelogs/v21.2.1.5869-prestable.md create mode 100644 docs/changelogs/v21.2.10.48-stable.md create mode 100644 docs/changelogs/v21.2.2.8-stable.md create mode 100644 docs/changelogs/v21.2.3.15-stable.md create mode 100644 docs/changelogs/v21.2.4.6-stable.md create mode 100644 docs/changelogs/v21.2.5.5-stable.md create mode 100644 docs/changelogs/v21.2.6.1-stable.md create mode 100644 docs/changelogs/v21.2.7.11-stable.md create mode 100644 docs/changelogs/v21.2.8.31-stable.md create mode 100644 docs/changelogs/v21.2.9.41-stable.md create mode 100644 docs/changelogs/v21.3.1.6185-prestable.md create mode 100644 docs/changelogs/v21.3.10.1-lts.md create mode 100644 docs/changelogs/v21.3.11.5-lts.md create mode 100644 docs/changelogs/v21.3.12.2-lts.md create mode 100644 docs/changelogs/v21.3.13.9-lts.md create mode 100644 docs/changelogs/v21.3.14.1-lts.md create mode 100644 docs/changelogs/v21.3.15.4-stable.md create mode 100644 docs/changelogs/v21.3.16.5-lts.md create mode 100644 docs/changelogs/v21.3.17.2-lts.md create mode 100644 docs/changelogs/v21.3.18.4-lts.md create mode 100644 docs/changelogs/v21.3.19.1-lts.md create mode 100644 docs/changelogs/v21.3.2.5-lts.md create mode 100644 docs/changelogs/v21.3.20.1-lts.md create mode 100644 docs/changelogs/v21.3.3.14-lts.md create mode 100644 docs/changelogs/v21.3.4.25-lts.md create mode 100644 docs/changelogs/v21.3.5.42-lts.md create mode 100644 docs/changelogs/v21.3.6.55-lts.md create mode 100644 docs/changelogs/v21.3.7.62-stable.md create mode 100644 docs/changelogs/v21.3.8.76-lts.md create mode 100644 docs/changelogs/v21.3.9.83-lts.md create mode 100644 docs/changelogs/v21.4.1.6422-prestable.md create mode 100644 docs/changelogs/v21.4.2.10-prestable.md create mode 100644 docs/changelogs/v21.4.3.21-stable.md create mode 100644 docs/changelogs/v21.4.4.30-stable.md create mode 100644 docs/changelogs/v21.4.5.46-stable.md create mode 100644 docs/changelogs/v21.4.6.55-stable.md create mode 100644 docs/changelogs/v21.4.7.3-stable.md create mode 100644 docs/changelogs/v21.5.1.6601-prestable.md create mode 100644 docs/changelogs/v21.5.2.25-prestable.md create mode 100644 docs/changelogs/v21.5.3.1-prestable.md create mode 100644 docs/changelogs/v21.5.4.6-prestable.md create mode 100644 docs/changelogs/v21.5.5.12-stable.md create mode 100644 docs/changelogs/v21.5.6.6-stable.md create mode 100644 docs/changelogs/v21.5.7.9-stable.md create mode 100644 docs/changelogs/v21.5.8.21-stable.md create mode 100644 docs/changelogs/v21.5.9.4-stable.md create mode 100644 docs/changelogs/v21.6.1.6891-prestable.md create mode 100644 docs/changelogs/v21.6.2.7-prestable.md create mode 100644 docs/changelogs/v21.6.3.14-stable.md create mode 100644 docs/changelogs/v21.6.4.26-stable.md create mode 100644 docs/changelogs/v21.6.5.37-stable.md create mode 100644 docs/changelogs/v21.6.6.51-stable.md create mode 100644 docs/changelogs/v21.6.7.57-stable.md create mode 100644 docs/changelogs/v21.6.8.62-stable.md create mode 100644 docs/changelogs/v21.6.9.7-stable.md create mode 100644 docs/changelogs/v21.7.1.7283-prestable.md create mode 100644 docs/changelogs/v21.7.10.4-stable.md create mode 100644 docs/changelogs/v21.7.11.3-stable.md create mode 100644 docs/changelogs/v21.7.2.7-stable.md create mode 100644 docs/changelogs/v21.7.3.14-stable.md create mode 100644 docs/changelogs/v21.7.4.18-stable.md create mode 100644 docs/changelogs/v21.7.5.29-stable.md create mode 100644 docs/changelogs/v21.7.6.39-stable.md create mode 100644 docs/changelogs/v21.7.7.47-stable.md create mode 100644 docs/changelogs/v21.7.8.58-stable.md create mode 100644 docs/changelogs/v21.7.9.7-stable.md create mode 100644 docs/changelogs/v21.8.1.7409-prestable.md create mode 100644 docs/changelogs/v21.8.10.19-lts.md create mode 100644 docs/changelogs/v21.8.11.4-lts.md create mode 100644 docs/changelogs/v21.8.12.29-lts.md create mode 100644 docs/changelogs/v21.8.13.6-lts.md create mode 100644 docs/changelogs/v21.8.14.5-lts.md create mode 100644 docs/changelogs/v21.8.15.7-lts.md create mode 100644 docs/changelogs/v21.8.2.19-prestable.md create mode 100644 docs/changelogs/v21.8.3.44-lts.md create mode 100644 docs/changelogs/v21.8.4.51-lts.md create mode 100644 docs/changelogs/v21.8.5.7-lts.md create mode 100644 docs/changelogs/v21.8.6.15-lts.md create mode 100644 docs/changelogs/v21.8.7.22-lts.md create mode 100644 docs/changelogs/v21.8.8.29-lts.md create mode 100644 docs/changelogs/v21.8.9.13-lts.md create mode 100644 docs/changelogs/v21.9.1.8000-prestable.md create mode 100644 docs/changelogs/v21.9.2.17-stable.md create mode 100644 docs/changelogs/v21.9.3.30-stable.md create mode 100644 docs/changelogs/v21.9.4.35-stable.md create mode 100644 docs/changelogs/v21.9.5.16-stable.md create mode 100644 docs/changelogs/v21.9.6.24-stable.md diff --git a/docs/changelogs/v21.1.1.5646-prestable.md b/docs/changelogs/v21.1.1.5646-prestable.md new file mode 100644 index 00000000000..ec8abc8a05b --- /dev/null +++ b/docs/changelogs/v21.1.1.5646-prestable.md @@ -0,0 +1,259 @@ +### ClickHouse release v21.1.1.5646-prestable FIXME as compared to v20.12.1.5236-prestable + +#### Backward Incompatible Change +* Prohibit toUnixTimestamp(Date()) (before it just returns UInt16 representation of Date). [#17376](https://github.com/ClickHouse/ClickHouse/pull/17376) ([Azat Khuzhin](https://github.com/azat)). +* Removed aggregate functions `timeSeriesGroupSum`, `timeSeriesGroupRateSum` because a friend of mine said they never worked. This fixes [#16869](https://github.com/ClickHouse/ClickHouse/issues/16869). If you have luck using these functions, write a email to clickhouse-feedback@yandex-team.com. [#17423](https://github.com/ClickHouse/ClickHouse/pull/17423) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The setting `input_format_null_as_default` is enabled by default. [#17525](https://github.com/ClickHouse/ClickHouse/pull/17525) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Set `insert_quorum_parallel` to 1 by default. It is significantly more convenient to use than "sequential" quorum inserts. But if you rely to sequential consistency, you should set the setting back to zero. [#17567](https://github.com/ClickHouse/ClickHouse/pull/17567) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Restrict `ALTER MODIFY SETTING` from changing storage settings that affects data parts (`write_final_mark` and `enable_mixed_granularity_parts`). [#18306](https://github.com/ClickHouse/ClickHouse/pull/18306) ([Amos Bird](https://github.com/amosbird)). +* Check settings constraints for profile settings from config. Server will fail to start if users.xml contain settings that do not meet corresponding constraints. [#18486](https://github.com/ClickHouse/ClickHouse/pull/18486) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove `sumburConsistentHash` function. This closes [#18120](https://github.com/ClickHouse/ClickHouse/issues/18120). [#18656](https://github.com/ClickHouse/ClickHouse/pull/18656) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `DIV` and `MOD` operators. `DIV` implements integer division. This is needed for MySQL compatibility and for `sqllogictest`. This closes [#18707](https://github.com/ClickHouse/ClickHouse/issues/18707). It may introduce incompatibilities if you are using DIV or MOD as column names or aliases. In case of incompatibility, write aliases after AS keyword or wrap identifiers in quotes (either double quotes or backquotes). [#18760](https://github.com/ClickHouse/ClickHouse/pull/18760) ([Du Chuan](https://github.com/spongedu)). + +#### New Feature +* Use https://github.com/lemire/fast_float to parse floating point numbers. [#16787](https://github.com/ClickHouse/ClickHouse/pull/16787) ([Maksim Kita](https://github.com/kitaisreal)). +* ... [#16819](https://github.com/ClickHouse/ClickHouse/pull/16819) ([pronvis](https://github.com/pronvis)). +* Provide a new aggregator combinator : `-SimpleState` to build SimpleAggregateFunction types via query. It's useful for defining MaterializedView of AggregatingMergeTree engine, and will benefit projections too. [#16853](https://github.com/ClickHouse/ClickHouse/pull/16853) ([Amos Bird](https://github.com/amosbird)). +* Added `mannWitneyUTest`, `studentTTest` and `welchTTest` aggregate functions. Refactored RankCorr a bit. [#16883](https://github.com/ClickHouse/ClickHouse/pull/16883) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add a setting optimize_on_insert. When enabled, do the same transformation for INSERTed block of data as if merge was done on this block (e.g. Replacing, Collapsing, Aggregating...). This setting will be enabled as default. This can influence Materialized View and MaterializeMySQL behaviour (see detailed description). This closes [#10683](https://github.com/ClickHouse/ClickHouse/issues/10683). [#16954](https://github.com/ClickHouse/ClickHouse/pull/16954) ([Kruglov Pavel](https://github.com/Avogar)). +* add ```*.zst``` compression/decompression support.It enables using ```*.zst``` in ```file()``` function and ```Content-encoding: zstd``` in http client.This closes [#16791 ](https://github.com/ClickHouse/ClickHouse/issues/16791). [#17144](https://github.com/ClickHouse/ClickHouse/pull/17144) ([Abi Palagashvili](https://github.com/fibersel)). +* * IP Dictionary supports `IPv4` / `IPv6` types directly. [#17571](https://github.com/ClickHouse/ClickHouse/pull/17571) ([Vladimir C](https://github.com/vdimir)). +* related: [#16176](https://github.com/ClickHouse/ClickHouse/issues/16176) Usage: ``` set limit = 10; set offset = 20; ``` this two settings will affect SELECT query as if it is added like ``` select * from ($your_original_select_query) tmp limit xxx offset xxx; ```. [#17633](https://github.com/ClickHouse/ClickHouse/pull/17633) ([hexiaoting](https://github.com/hexiaoting)). +* Add asynchronous metrics on total amount of rows, bytes and parts in MergeTree tables. This fix [#11714](https://github.com/ClickHouse/ClickHouse/issues/11714). [#17639](https://github.com/ClickHouse/ClickHouse/pull/17639) ([flynn](https://github.com/ucasfl)). +* Introduce `DETACH TABLE/VIEW ... PERMANENTLY` syntax, so that after restarting the table does not reappear back automatically (only by explicit request). The table can still be attached back using the short syntax ATTACH TABLE. Implements [#5555](https://github.com/ClickHouse/ClickHouse/issues/5555). Fixes [#13850](https://github.com/ClickHouse/ClickHouse/issues/13850). [#17642](https://github.com/ClickHouse/ClickHouse/pull/17642) ([filimonov](https://github.com/filimonov)). +* Adds a new table called `system.distributed_ddl_queue` that displays the queries in the DDL worker queue. [#17656](https://github.com/ClickHouse/ClickHouse/pull/17656) ([Bharat Nallan](https://github.com/bharatnc)). +* Add function `encodeXMLComponent` to escape characters to place string into XML text node or attribute. [#17659](https://github.com/ClickHouse/ClickHouse/pull/17659) ([nauta](https://github.com/nautaa)). +* Now clickhouse-client supports opening EDITOR to edit commands. `Alt-Shift-E`. [#17665](https://github.com/ClickHouse/ClickHouse/pull/17665) ([Amos Bird](https://github.com/amosbird)). +* Add support for PROXYv1 protocol to wrap native TCP interface. Allow quotas to be keyed by proxy-forwarded IP address (applied for PROXYv1 address and for X-Forwarded-For from HTTP interface). This is useful when you provide access to ClickHouse only via trusted proxy (e.g. CloudFlare) but want to account user resources by their original IP addresses. This fixes [#17268](https://github.com/ClickHouse/ClickHouse/issues/17268). [#17707](https://github.com/ClickHouse/ClickHouse/pull/17707) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to use custom TLD list in functions `firstSignificantSubdomainCustom`, `cutToFirstSignificantSubdomainCustom`. [#17748](https://github.com/ClickHouse/ClickHouse/pull/17748) ([Azat Khuzhin](https://github.com/azat)). +* Clickhouse-benchmark added query parameter. [#17832](https://github.com/ClickHouse/ClickHouse/pull/17832) ([Maksim Kita](https://github.com/kitaisreal)). +* Extended `OPTIMIZE ... DEDUPLICATE` syntax to allow explicit (or implicit with asterisk/column transformers) list of columns to check for duplicates on. ... [#17846](https://github.com/ClickHouse/ClickHouse/pull/17846) ([Vasily Nemkov](https://github.com/Enmk)). +* Add settings `min_compress_block_size` and `max_compress_block_size` to MergeTreeSettings, which have higher priority than the global settings and take effect when they are set. close [13890](https://github.com/ClickHouse/ClickHouse/issues/13890). [#17867](https://github.com/ClickHouse/ClickHouse/pull/17867) ([flynn](https://github.com/ucasfl)). +* Implemented `ATTACH TABLE name FROM 'path/to/data/' (col1 Type1, ...` query. It creates new table with provided structure and attaches table data from provided directory in `user_files`. [#17903](https://github.com/ClickHouse/ClickHouse/pull/17903) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added `arrayMin`, `arrayMax`, `arrayAvg` aggregation functions. [#18032](https://github.com/ClickHouse/ClickHouse/pull/18032) ([Maksim Kita](https://github.com/kitaisreal)). +* Support `SHOW SETTINGS` statement to show parameters in system.settings. `SHOW CHANGED SETTINGS` and `LIKE/ILIKE` clause are also supported. [#18056](https://github.com/ClickHouse/ClickHouse/pull/18056) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Allow create table as select with columns specification. Example `CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;`. [#18060](https://github.com/ClickHouse/ClickHouse/pull/18060) ([Maksim Kita](https://github.com/kitaisreal)). +* - IP Dictionary supports key fetching. Resolves [#18241](https://github.com/ClickHouse/ClickHouse/issues/18241). [#18480](https://github.com/ClickHouse/ClickHouse/pull/18480) ([Vladimir C](https://github.com/vdimir)). +* Implemented `REPLACE TABLE` and `CREATE OR REPLACE TABLE` queries. [#18521](https://github.com/ClickHouse/ClickHouse/pull/18521) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added function `byteSize` to estimate of uncompressed byte size of its arguments in memory. E.g. for UInt32 argument it will return constant 4, for String argument - the string length + 9. The function can take multiple arguments. The typical application is byteSize(*). [#18579](https://github.com/ClickHouse/ClickHouse/pull/18579) ([Ping Yu](https://github.com/pingyu)). +* Add `query_id` column to `system.part_log` for inserted parts. closes [#10097](https://github.com/ClickHouse/ClickHouse/issues/10097). [#18644](https://github.com/ClickHouse/ClickHouse/pull/18644) ([flynn](https://github.com/ucasfl)). +* Now we have a new storage setting `max_partitions_to_read` for tables in the MergeTree family. It limits the max number of partitions that can be accessed in one query. A user setting `force_max_partition_limit` is also added to enforce this constraint. [#18712](https://github.com/ClickHouse/ClickHouse/pull/18712) ([Amos Bird](https://github.com/amosbird)). +* Function `position` now supports `position(needle in haystack)` synax for SQL compatibility. This closes [#18701](https://github.com/ClickHouse/ClickHouse/issues/18701). ... [#18779](https://github.com/ClickHouse/ClickHouse/pull/18779) ([Jianmei Zhang](https://github.com/zhangjmruc)). + +#### Performance Improvement +* Slightly improved performance of float parsing. [#16809](https://github.com/ClickHouse/ClickHouse/pull/16809) ([Maksim Kita](https://github.com/kitaisreal)). +* Improved performance of function `repeat`. [#16937](https://github.com/ClickHouse/ClickHouse/pull/16937) ([satanson](https://github.com/satanson)). +* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). +* Now the `-If` combinator is devirtualized, and `count` is properly vectorized. This is for https://github.com/ClickHouse/ClickHouse/pull/17041. [#17043](https://github.com/ClickHouse/ClickHouse/pull/17043) ([Amos Bird](https://github.com/amosbird)). +* Improve performance of AggregatingMergeTree w/ SimpleAggregateFunction(String) in PK. [#17109](https://github.com/ClickHouse/ClickHouse/pull/17109) ([Azat Khuzhin](https://github.com/azat)). +* Add `remerge_sort_lowered_memory_bytes_ratio` setting (If memory usage after remerge does not reduced by this ratio, remerge will be disabled). [#17539](https://github.com/ClickHouse/ClickHouse/pull/17539) ([Azat Khuzhin](https://github.com/azat)). +* Speedup `IPv6CIDRToRange` implementation. [#17569](https://github.com/ClickHouse/ClickHouse/pull/17569) ([Vladimir C](https://github.com/vdimir)). +* Using dragonbox algorithm for float to string conversion instead of ryu. [#17831](https://github.com/ClickHouse/ClickHouse/pull/17831) ([Maksim Kita](https://github.com/kitaisreal)). +* Optimized read for StorageMemory. [#18052](https://github.com/ClickHouse/ClickHouse/pull/18052) ([Maksim Kita](https://github.com/kitaisreal)). +* Don't send empty blocks to shards on synchronous INSERT into Distributed table. This closes [#14571](https://github.com/ClickHouse/ClickHouse/issues/14571). [#18775](https://github.com/ClickHouse/ClickHouse/pull/18775) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Replace `PODArray` with `PODArrayWithStackMemory` in `AggregateFunctionWindowFunnelData` to improvement `windowFunnel` function performance. [#18817](https://github.com/ClickHouse/ClickHouse/pull/18817) ([flynn](https://github.com/ucasfl)). +* Add `--no-system-table` option for `clickhouse-local` to run without system tables. This avoids initialization of `DateLUT` that may take noticeable amount of time (tens of milliseconds) at startup. [#18899](https://github.com/ClickHouse/ClickHouse/pull/18899) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* - New IP Dictionary implementation with lower memory consumption, improved performance for some cases, and fixed bugs. [#16804](https://github.com/ClickHouse/ClickHouse/pull/16804) ([Vladimir C](https://github.com/vdimir)). +* Added proper authentication using environment, `~/.aws` and `AssumeRole` for S3 client. [#16856](https://github.com/ClickHouse/ClickHouse/pull/16856) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Support HTTP proxy and HTTPS S3 endpoint configuration. [#16861](https://github.com/ClickHouse/ClickHouse/pull/16861) ([Pavel Kovalenko](https://github.com/Jokser)). +* When `-OrNull` combinator combined `-If`, `-Merge`, `-MergeState`, `-State` combinators, we should put `-OrNull` in front. [#16935](https://github.com/ClickHouse/ClickHouse/pull/16935) ([flynn](https://github.com/ucasfl)). +* - Add configuration for multi zookeeper clusters. [#17070](https://github.com/ClickHouse/ClickHouse/pull/17070) ([fastio](https://github.com/fastio)). +* - Add limit for http redirects in request to S3 storage ('s3_max_redirects'). [#17220](https://github.com/ClickHouse/ClickHouse/pull/17220) ([ianton-ru](https://github.com/ianton-ru)). +* Now set indices will work with `GLOBAL IN`. This fixes [#17232](https://github.com/ClickHouse/ClickHouse/issues/17232) , [#5576](https://github.com/ClickHouse/ClickHouse/issues/5576) . [#17253](https://github.com/ClickHouse/ClickHouse/pull/17253) ([Amos Bird](https://github.com/amosbird)). +* Avoid possible stack overflow in bigint conversion. Big integers are experimental. [#17269](https://github.com/ClickHouse/ClickHouse/pull/17269) ([flynn](https://github.com/ucasfl)). +* Improved minimal Web UI: add history; add sharing support; avoid race condition of different requests; add request in-flight and ready indicators; add favicon; detect Ctrl+Enter if textarea is not in focus. [#17293](https://github.com/ClickHouse/ClickHouse/pull/17293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to use `optimize_move_to_prewhere` optimization with compact parts, when sizes of columns are unknown. [#17330](https://github.com/ClickHouse/ClickHouse/pull/17330) ([Anton Popov](https://github.com/CurtizJ)). +* Implement `countSubstrings()`/`countSubstringsCaseInsensitive()`/`countSubstringsCaseInsensitiveUTF8()` (Count the number of substring occurrences). [#17347](https://github.com/ClickHouse/ClickHouse/pull/17347) ([Azat Khuzhin](https://github.com/azat)). +* Add eof check in receiveHello to prevent getting `Attempt to read after eof` exception. [#17365](https://github.com/ClickHouse/ClickHouse/pull/17365) ([Kruglov Pavel](https://github.com/Avogar)). +* Replaced `malloc` with `new`, so that the `MemoryTracker` takes this memory into account. [#17412](https://github.com/ClickHouse/ClickHouse/pull/17412) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix parsing of SETTINGS clause of the INSERT ... SELECT ... SETTINGS query. [#17414](https://github.com/ClickHouse/ClickHouse/pull/17414) ([Azat Khuzhin](https://github.com/azat)). +* Multiple improvements in `./clickhouse install` script. [#17421](https://github.com/ClickHouse/ClickHouse/pull/17421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simplify Sys/V init script. It was not working on Ubuntu 12.04. [#17428](https://github.com/ClickHouse/ClickHouse/pull/17428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now queries coming to the server via MySQL and PostgreSQL protocols have distinctive interface types (which can be seen in the `interface` column of the table`system.query_log`): `4` for MySQL, and `5` for PostgreSQL, instead of formerly used `1` which is now used for the native protocol only. [#17437](https://github.com/ClickHouse/ClickHouse/pull/17437) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow specifying [TTL](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#mergetree-table-ttl) to remove old entries from [system log tables](https://clickhouse.tech/docs/en/operations/system-tables/), using the `` attribute in `config.xml`. [#17438](https://github.com/ClickHouse/ClickHouse/pull/17438) ([Du Chuan](https://github.com/spongedu)). +* Add functions countMatches/countMatchesCaseInsensitive. [#17459](https://github.com/ClickHouse/ClickHouse/pull/17459) ([Azat Khuzhin](https://github.com/azat)). +* Return dynamic columns like MATERIALIZED / ALIAS for wildcard query when switches `asterisk_include_materialized_columns` and `asterisk_include_alias_columns` are turned on. [#17462](https://github.com/ClickHouse/ClickHouse/pull/17462) ([Ken Chen](https://github.com/chenziliang)). +* Export asynchronous metrics of all servers current threads. It's useful to track down issues like https://github.com/ClickHouse-Extras/poco/pull/28. [#17463](https://github.com/ClickHouse/ClickHouse/pull/17463) ([Amos Bird](https://github.com/amosbird)). +* Export current max ddl entry executed by DDLWorker. It's useful to check if DDLWorker hangs somewhere. [#17464](https://github.com/ClickHouse/ClickHouse/pull/17464) ([Amos Bird](https://github.com/amosbird)). +* Query obfuscator: avoid usage of some SQL keywords for identifier names. [#17526](https://github.com/ClickHouse/ClickHouse/pull/17526) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow query parameters in UPDATE statement of ALTER query. Fixes [#10976](https://github.com/ClickHouse/ClickHouse/issues/10976). [#17563](https://github.com/ClickHouse/ClickHouse/pull/17563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't throw "Too many parts" error in the middle of INSERT query. [#17566](https://github.com/ClickHouse/ClickHouse/pull/17566) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to reload symbols from debug file. This PR also fixes a build-id issue. [#17637](https://github.com/ClickHouse/ClickHouse/pull/17637) ([Amos Bird](https://github.com/amosbird)). +* This fixes [#17457](https://github.com/ClickHouse/ClickHouse/issues/17457). [#17641](https://github.com/ClickHouse/ClickHouse/pull/17641) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Switch to patched version of RocksDB (from ClickHouse-Extras). [#17643](https://github.com/ClickHouse/ClickHouse/pull/17643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not ignore server memory limits during Buffer flush. [#17646](https://github.com/ClickHouse/ClickHouse/pull/17646) ([Azat Khuzhin](https://github.com/azat)). +* Removed exception throwing at table initialization if there was no connection (it will be reconnecting in the background). [#17709](https://github.com/ClickHouse/ClickHouse/pull/17709) ([Kseniia Sumarokova](https://github.com/kssenii)). +* system.query_log now has extensive information to achieve better query analysis. [#17726](https://github.com/ClickHouse/ClickHouse/pull/17726) ([Amos Bird](https://github.com/amosbird)). +* Check system log tables' engine definition grammatically to prevent some configuration errors. Notes that this grammar check is not semantical, that means such mistakes as non-existent columns / expression functions would be not found out util the table is created. [#17739](https://github.com/ClickHouse/ClickHouse/pull/17739) ([Du Chuan](https://github.com/spongedu)). +* Improves the path concatenation of zookeeper paths inside DDLWorker. [#17767](https://github.com/ClickHouse/ClickHouse/pull/17767) ([Bharat Nallan](https://github.com/bharatnc)). +* Improvement of Web UI: do not add empty query to browser history. [#17770](https://github.com/ClickHouse/ClickHouse/pull/17770) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Let the possibility to override timeout value for running script using the ClickHouse docker image. [#17818](https://github.com/ClickHouse/ClickHouse/pull/17818) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Add metrics(Parts, PartsActive, PartsInactive) for part number in MergeTree in clickhouse. [#17838](https://github.com/ClickHouse/ClickHouse/pull/17838) ([徐炘](https://github.com/weeds085490)). +* Add diagnostic information when two merge tables try to read each other's data. [#17854](https://github.com/ClickHouse/ClickHouse/pull/17854) ([徐炘](https://github.com/weeds085490)). +* Hints for column names. [#17112](https://github.com/ClickHouse/ClickHouse/issues/17112). [#17857](https://github.com/ClickHouse/ClickHouse/pull/17857) ([fastio](https://github.com/fastio)). +* Support for async tasks in `PipelineExecutor`. Initial support of async sockets for remote queries. [#17868](https://github.com/ClickHouse/ClickHouse/pull/17868) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `allow_introspection_functions=0` prohibits usage of introspection functions but doesn't prohibit giving grants for them anymore (the grantee will need to set `allow_introspection_functions=1` for himself to be able to use that grant). Similarly `allow_ddl=0` prohibits usage of DDL commands but doesn't prohibit giving grants for them anymore. [#17908](https://github.com/ClickHouse/ClickHouse/pull/17908) ([Vitaly Baranov](https://github.com/vitlibar)). +* Ability to set custom metadata when putting S3 object. [#17909](https://github.com/ClickHouse/ClickHouse/pull/17909) ([Pavel Kovalenko](https://github.com/Jokser)). +* Adaptive choose of single/multi part upload in WriteBufferFromS3. Single part upload is controlled by a new setting 'max_single_part_upload_size'. [#17934](https://github.com/ClickHouse/ClickHouse/pull/17934) ([Pavel Kovalenko](https://github.com/Jokser)). +* Forcibly removing empty or bad metadata files from filesystem for DiskS3. S3 is an experimental feature. [#17935](https://github.com/ClickHouse/ClickHouse/pull/17935) ([Pavel Kovalenko](https://github.com/Jokser)). +* Now the table function `merge()` requires the current user to have the `SELECT` privilege on each table it receives data from. This PR fixes [#16964](https://github.com/ClickHouse/ClickHouse/issues/16964). [#17983](https://github.com/ClickHouse/ClickHouse/pull/17983) ([Vitaly Baranov](https://github.com/vitlibar)). +* Decrease log verbosity of the events when the client drops the connection from WARNING to INFORMATION. [#18005](https://github.com/ClickHouse/ClickHouse/pull/18005) ([filimonov](https://github.com/filimonov)). +* Fix clickhouse-client rendering issue when the size of terminal window changes. [#18009](https://github.com/ClickHouse/ClickHouse/pull/18009) ([Amos Bird](https://github.com/amosbird)). +* Temporary tables are visible in the system tables `system.tables` and `system.columns` now only in those session where they have been created. The internal database `_temporary_and_external_tables` is now hidden in those system tables; temporary tables are shown as tables with empty database with the `is_temporary` flag set instead. [#18014](https://github.com/ClickHouse/ClickHouse/pull/18014) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix dead list watches removal for TestKeeperStorage. [#18065](https://github.com/ClickHouse/ClickHouse/pull/18065) ([alesapin](https://github.com/alesapin)). +* Support `SHOW CREATE VIEW name` syntax like [MySQL](https://dev.mysql.com/doc/refman/5.7/en/show-create-view.html). [#18095](https://github.com/ClickHouse/ClickHouse/pull/18095) ([Du Chuan](https://github.com/spongedu)). +* Now the table function `merge()` requires the current user to have the `SELECT` privilege on each table it receives data from. This PR fixes [#16964](https://github.com/ClickHouse/ClickHouse/issues/16964). [#18104](https://github.com/ClickHouse/ClickHouse/pull/18104) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `disk` to Set and Join storage settings. [#18112](https://github.com/ClickHouse/ClickHouse/pull/18112) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* `EXPLAIN AST` now support queries other then `SELECT`. [#18136](https://github.com/ClickHouse/ClickHouse/pull/18136) ([李扬](https://github.com/taiyang-li)). +* All queries of type `Decimal * Float` or vice versa are allowed, including aggregate ones (e.g. `SELECT sum(decimal_field * 1.1)` or `SELECT dec_col * float_col`), the result type is Float32 or Float64. [#18145](https://github.com/ClickHouse/ClickHouse/pull/18145) ([Mike Kot](https://github.com/myrrc)). +* Array aggregation `arrayMin`, `arrayMax`, `arraySum`, `arrayAvg` support for `Int128`, `Int256`, `UInt256`. [#18147](https://github.com/ClickHouse/ClickHouse/pull/18147) ([Maksim Kita](https://github.com/kitaisreal)). +* Better hints for `SHOW ...` query syntax. [#18183](https://github.com/ClickHouse/ClickHouse/pull/18183) ([Du Chuan](https://github.com/spongedu)). +* Now clickhouse-install could work on Mac. The problem was that there is no procfs on this platform. [#18201](https://github.com/ClickHouse/ClickHouse/pull/18201) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Make better adaptive granularity calculation for merge tree wide parts. [#18223](https://github.com/ClickHouse/ClickHouse/pull/18223) ([alesapin](https://github.com/alesapin)). +* Allow to parse Array fields from CSV if it is represented as a string containing array that was serialized as nested CSV. Example: `"[""Hello"", ""world"", ""42"""" TV""]"` will parse as `['Hello', 'world', '42" TV']`. Allow to parse array in CSV in a string without enclosing braces. Example: `"'Hello', 'world', '42"" TV'"` will parse as `['Hello', 'world', '42" TV']`. [#18271](https://github.com/ClickHouse/ClickHouse/pull/18271) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In case of unexpected exceptions automatically restart background thread which is responsible for execution of distributed DDL queries. Fixes [#17991](https://github.com/ClickHouse/ClickHouse/issues/17991). [#18285](https://github.com/ClickHouse/ClickHouse/pull/18285) ([徐炘](https://github.com/weeds085490)). +* Add a new setting `insert_distributed_one_random_shard = 1` to allow insertion into multi-sharded distributed table without any distributed key. [#18294](https://github.com/ClickHouse/ClickHouse/pull/18294) ([Amos Bird](https://github.com/amosbird)). +* related to [#18133](https://github.com/ClickHouse/ClickHouse/issues/18133). [#18309](https://github.com/ClickHouse/ClickHouse/pull/18309) ([hexiaoting](https://github.com/hexiaoting)). +* Fix potential server crash during Buffer rollback (that is impossible in current ClickHouse version). [#18329](https://github.com/ClickHouse/ClickHouse/pull/18329) ([Azat Khuzhin](https://github.com/azat)). +* Support builtin function `isIPv4String` && `isIPv6String` like [MySQL](https://github.com/ClickHouse/ClickHouse/compare/master...spongedu:support_is_ipv4?expand=1). [#18349](https://github.com/ClickHouse/ClickHouse/pull/18349) ([Du Chuan](https://github.com/spongedu)). +* Add ability to modify primary and partition key column type from `LowCardinality(Type)` to `Type` and vice versa. Also add an ability to modify primary key column type from `EnumX ` to `IntX` type. Fixes [#5604](https://github.com/ClickHouse/ClickHouse/issues/5604). [#18362](https://github.com/ClickHouse/ClickHouse/pull/18362) ([alesapin](https://github.com/alesapin)). +* Fix bug: no newline after exception message in some tools. [#18444](https://github.com/ClickHouse/ClickHouse/pull/18444) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* support syntax `EXISTS DATABASE name`. [#18458](https://github.com/ClickHouse/ClickHouse/pull/18458) ([Du Chuan](https://github.com/spongedu)). +* Fixed assertion error inside allocator in case when last argument of function bar is NaN. Now simple ClickHouse's exception is being thrown. This fixes [#17876](https://github.com/ClickHouse/ClickHouse/issues/17876). [#18520](https://github.com/ClickHouse/ClickHouse/pull/18520) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* PODArray: Avoid call to memcpy with (nullptr, 0) arguments (Fix UBSan report). This fixes [#18525](https://github.com/ClickHouse/ClickHouse/issues/18525). [#18526](https://github.com/ClickHouse/ClickHouse/pull/18526) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix SimpleAggregateFunction in SummingMergeTree. Now it works like AggregateFunction. In previous versions values were summed together regardless to the aggregate function. This fixes [#18564](https://github.com/ClickHouse/ClickHouse/issues/18564) . [#8052](https://github.com/ClickHouse/ClickHouse/issues/8052). [#18637](https://github.com/ClickHouse/ClickHouse/pull/18637) ([Amos Bird](https://github.com/amosbird)). +* Another fix of using SimpleAggregateFunction in SummingMergeTree. This fixes [#18676](https://github.com/ClickHouse/ClickHouse/issues/18676) . [#18677](https://github.com/ClickHouse/ClickHouse/pull/18677) ([Amos Bird](https://github.com/amosbird)). +* Allow column transformer `EXCEPT` to accept a string as regular expression matcher. This resolves [#18685](https://github.com/ClickHouse/ClickHouse/issues/18685) . [#18699](https://github.com/ClickHouse/ClickHouse/pull/18699) ([Amos Bird](https://github.com/amosbird)). +* Apply `ALTER TABLE ON CLUSTER MODIFY SETTING ...` to all replicas. Because we don't replicate such alter commands. [#18789](https://github.com/ClickHouse/ClickHouse/pull/18789) ([Amos Bird](https://github.com/amosbird)). +* Expand macros in the zk path when executing fetchPartition. [#18839](https://github.com/ClickHouse/ClickHouse/pull/18839) ([fastio](https://github.com/fastio)). +* `SYSTEM KILL` command started to work in Docker. This closes [#18847](https://github.com/ClickHouse/ClickHouse/issues/18847). [#18848](https://github.com/ClickHouse/ClickHouse/pull/18848) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Two new settings (by analogy with MergeTree family) has been added: - `fsync_after_insert` - Do fsync for every inserted. Will decreases performance of inserts. - `fsync_directories` - Do fsync for temporary directory (that is used for async INSERT only) after all operations (writes, renames, etc.). [#18864](https://github.com/ClickHouse/ClickHouse/pull/18864) ([Azat Khuzhin](https://github.com/azat)). +* change the sorting key of events_list from timestamp to (timestamp, event_index). [#18884](https://github.com/ClickHouse/ClickHouse/pull/18884) ([Fuwang Hu](https://github.com/fuwhu)). +* Aliases declared in `WITH` statement are properly used in index analysis. Queries like `WITH column AS alias SELECT ... WHERE alias = ...` may use index now. [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix +* Fix bug when clickhouse-server doesn't send `close` request to ZooKeeper server. [#16837](https://github.com/ClickHouse/ClickHouse/pull/16837) ([alesapin](https://github.com/alesapin)). +* TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)). +* Fix incorrect comparison of types `DateTime64` with different scales. Fixes [#16655](https://github.com/ClickHouse/ClickHouse/issues/16655) ... [#16952](https://github.com/ClickHouse/ClickHouse/pull/16952) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix Merge(Distributed()) with JOIN. [#16993](https://github.com/ClickHouse/ClickHouse/pull/16993) ([Azat Khuzhin](https://github.com/azat)). +* - Fix optimize_distributed_group_by_sharding_key for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)). +* Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). +* Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)). +* Fixed possible not-working mutations for parts stored on S3 disk. [#17227](https://github.com/ClickHouse/ClickHouse/pull/17227) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix [#15235](https://github.com/ClickHouse/ClickHouse/issues/15235). When clickhouse-copier handle non-partitioned table, throws segfault error. [#17248](https://github.com/ClickHouse/ClickHouse/pull/17248) ([Qi Chen](https://github.com/kaka11chen)). +* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* Fix possible `Unexpected packet Data received from client` error for Distributed queries with `LIMIT`. [#17254](https://github.com/ClickHouse/ClickHouse/pull/17254) ([Azat Khuzhin](https://github.com/azat)). +* Fix indeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)). +* fixes [#16835](https://github.com/ClickHouse/ClickHouse/issues/16835) try fix miss match header with MySQL SHOW statement. [#17366](https://github.com/ClickHouse/ClickHouse/pull/17366) ([Winter Zhang](https://github.com/zhang2014)). +* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed high CPU usage in background tasks of *MergeTree tables. [#17416](https://github.com/ClickHouse/ClickHouse/pull/17416) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid server abnormal termination in case of too low memory limits (`max_memory_usage=1`/`max_untracked_memory=1`). [#17453](https://github.com/ClickHouse/ClickHouse/pull/17453) ([Azat Khuzhin](https://github.com/azat)). +* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). +* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)). +* In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). +* Fixed segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Exception message about max table size to drop was displayed incorrectly. [#17764](https://github.com/ClickHouse/ClickHouse/pull/17764) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). +* fix incorrect initialize `max_compress_block_size` of MergeTreeWriterSettings with `min_compress_block_size`. [#17833](https://github.com/ClickHouse/ClickHouse/pull/17833) ([flynn](https://github.com/ucasfl)). +* Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix max_distributed_connections (affects `prefer_localhost_replica=1` and `max_threads!=max_distributed_connections`). [#17848](https://github.com/ClickHouse/ClickHouse/pull/17848) ([Azat Khuzhin](https://github.com/azat)). +* Trivial query optimization was producing wrong result if query contains ARRAY JOIN (so query is actually non trivial). [#17887](https://github.com/ClickHouse/ClickHouse/pull/17887) ([sundyli](https://github.com/sundy-li)). +* Fix comparison of `DateTime64` and `Date`. Fixes [#13804](https://github.com/ClickHouse/ClickHouse/issues/13804) and [#11222](https://github.com/ClickHouse/ClickHouse/issues/11222). ... [#17895](https://github.com/ClickHouse/ClickHouse/pull/17895) ([Vasily Nemkov](https://github.com/Enmk)). +* When server log rotation was configured using `logger.size` parameter with numeric value larger than 2^32, the logs were not rotated properly. This is fixed. [#17905](https://github.com/ClickHouse/ClickHouse/pull/17905) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) support convert MySQL prefix index for MaterializeMySQL CC: @tavplubix. [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)). +* Fix comparison of `DateTime64` and `Date`. Fixes [#13804](https://github.com/ClickHouse/ClickHouse/issues/13804) and [#11222](https://github.com/ClickHouse/ClickHouse/issues/11222). ... [#18050](https://github.com/ClickHouse/ClickHouse/pull/18050) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix corruption in librdkafka snappy decompression (was a problem only for gcc10 builds, but official builds uses clang already, so at least recent official releases are not affected). [#18053](https://github.com/ClickHouse/ClickHouse/pull/18053) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). +* Fix `Unknown setting profile` error on attempt to set settings profile. [#18167](https://github.com/ClickHouse/ClickHouse/pull/18167) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix inserting a row with default value in case of parsing error in the last column. Fixes [#17712](https://github.com/ClickHouse/ClickHouse/issues/17712). [#18182](https://github.com/ClickHouse/ClickHouse/pull/18182) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) fix unique key convert crash in MaterializeMySQL database engine. [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)). +* Fix key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)). +* Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message ` MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)). +* - Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual IPv4/IPv6 stack; - Fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes; Possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). +* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible race condition in concurrent usage of `Set` or `Join` tables and selects from `system.tables`. [#18385](https://github.com/ClickHouse/ClickHouse/pull/18385) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix -SimpleState combinator generates incompatible arugment type and return type. [#18404](https://github.com/ClickHouse/ClickHouse/pull/18404) ([Amos Bird](https://github.com/amosbird)). +* Fix the unexpected behaviour of `SHOW TABLES`. [#18431](https://github.com/ClickHouse/ClickHouse/pull/18431) ([fastio](https://github.com/fastio)). +* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Proper support for 12AM in `parseDateTimeBestEffort` function. This fixes [#18402](https://github.com/ClickHouse/ClickHouse/issues/18402). [#18449](https://github.com/ClickHouse/ClickHouse/pull/18449) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). +* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)). +* Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add FixedString Data type support. I'll get this exception "Code: 50, e.displayText() = DB::Exception: Unsupported type FixedString(1)" when replicating data from MySQL to ClickHouse. This patch fixes bug [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)). +* Fix previous bug when date overflow with different values. Strict Date value limit to "2106-02-07", cast date > "2106-02-07" to value 0. [#18565](https://github.com/ClickHouse/ClickHouse/pull/18565) ([hexiaoting](https://github.com/hexiaoting)). +* Fix removing of empty parts in `ReplicatedMergeTree` tables, created with old syntax. Fixes [#18582](https://github.com/ClickHouse/ClickHouse/issues/18582). [#18614](https://github.com/ClickHouse/ClickHouse/pull/18614) ([Anton Popov](https://github.com/CurtizJ)). +* Fix Logger with unmatched arg size. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)). +* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). +* - Fix never worked `fsync_part_directory`/`fsync_after_insert`/`in_memory_parts_insert_sync`. [#18845](https://github.com/ClickHouse/ClickHouse/pull/18845) ([Azat Khuzhin](https://github.com/azat)). +* Fix use after free bug in rocksdb. [#18862](https://github.com/ClickHouse/ClickHouse/pull/18862) ([sundyli](https://github.com/sundy-li)). +* Queries for external databases (MySQL, ODBC, JDBC) were incorrectly rewritten if there was an expression in form of `x IN table`. This fixes [#9756](https://github.com/ClickHouse/ClickHouse/issues/9756). [#18876](https://github.com/ClickHouse/ClickHouse/pull/18876) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible hang at shutdown in clickhouse-local. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)). + +#### Build/Testing/Packaging Improvement +* Add some test for MaterializeMySQL. e.g. network partition, MySQL kill sync thread... [#16806](https://github.com/ClickHouse/ClickHouse/pull/16806) ([TCeason](https://github.com/TCeason)). +* Now ClickHouse can pretend to be a fake ZooKeeper. Currently, storage implementation is just stored in-memory hash-table, and server partially support ZooKeeper protocol. [#16877](https://github.com/ClickHouse/ClickHouse/pull/16877) ([alesapin](https://github.com/alesapin)). +* * Added RBAC tests for `ATTACH`, `CREATE`, `DROP`, and `DETACH`. [#16977](https://github.com/ClickHouse/ClickHouse/pull/16977) ([MyroTk](https://github.com/MyroTk)). +* `PODArray` does not initialize "new" elements when resizing, unlike `std::vector`. This probably fixes [this failure](https://clickhouse-test-reports.s3.yandex.net/17309/065cd002578f2e8228f12a2744bd40c970065e0c/stress_test_(memory)/stderr.log) from [#17309](https://github.com/ClickHouse/ClickHouse/issues/17309). [#17344](https://github.com/ClickHouse/ClickHouse/pull/17344) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* There was an uninitialized variable in the code of Copier. [#17363](https://github.com/ClickHouse/ClickHouse/pull/17363) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Send info about official build, memory, cpu and free disk space to Sentry if it is enabled. Sentry is opt-in feature to help ClickHouse developers. This closes [#17279](https://github.com/ClickHouse/ClickHouse/issues/17279). [#17543](https://github.com/ClickHouse/ClickHouse/pull/17543) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add an integration test: MySQL server killed while insert for MaterializeMySQL Engine. [#17614](https://github.com/ClickHouse/ClickHouse/pull/17614) ([TCeason](https://github.com/TCeason)). +* Add an integration test: ClickHouse killed while insert for MaterializeMySQL ENGINE. [#17622](https://github.com/ClickHouse/ClickHouse/pull/17622) ([TCeason](https://github.com/TCeason)). +* - RBAC testflows tests for SHOW, TRUNCATE, KILL, and OPTIMIZE. - Updates to old tests. - Resolved comments from #https://github.com/ClickHouse/ClickHouse/pull/16977. [#17657](https://github.com/ClickHouse/ClickHouse/pull/17657) ([MyroTk](https://github.com/MyroTk)). +* Now we use the fresh docker daemon version in integration tests. [#17671](https://github.com/ClickHouse/ClickHouse/pull/17671) ([alesapin](https://github.com/alesapin)). +* - Testflows tests for RBAC [ACCESS MANAGEMENT](https://clickhouse.tech/docs/en/sql-reference/statements/grant/#grant-access-management) privileges. [#17804](https://github.com/ClickHouse/ClickHouse/pull/17804) ([MyroTk](https://github.com/MyroTk)). +* Updating TestFlows README.md to include "How To Debug Why Test Failed" section. [#17808](https://github.com/ClickHouse/ClickHouse/pull/17808) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add our own CMakeList for dragonbox which was added in [#17831](https://github.com/ClickHouse/ClickHouse/issues/17831). [#17869](https://github.com/ClickHouse/ClickHouse/pull/17869) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Enable Pytest framework for stateless tests. [#17902](https://github.com/ClickHouse/ClickHouse/pull/17902) ([Ivan](https://github.com/abyss7)). +* Adjusting timeouts a bit, in the good hope that it will prevent flakiness of the test. [#18000](https://github.com/ClickHouse/ClickHouse/pull/18000) ([filimonov](https://github.com/filimonov)). +* Now, `clickhouse-test` DROP/CREATE databases with a timeout. [#18098](https://github.com/ClickHouse/ClickHouse/pull/18098) ([alesapin](https://github.com/alesapin)). +* Change OpenSSL to BoringSSL. It allows to avoid issues with sanitizers. This fixes [#12490](https://github.com/ClickHouse/ClickHouse/issues/12490). This fixes [#17502](https://github.com/ClickHouse/ClickHouse/issues/17502). This fixes [#12952](https://github.com/ClickHouse/ClickHouse/issues/12952). [#18129](https://github.com/ClickHouse/ClickHouse/pull/18129) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix usage of uninitialized value in function toModifiedJulianDayOrNull, reported by MSan. Was discovered [here](https://github.com/ClickHouse/ClickHouse/pull/17726#issuecomment-744050500). [#18172](https://github.com/ClickHouse/ClickHouse/pull/18172) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update `anchore/scan-action@main` workflow action (was moved from master). [#18192](https://github.com/ClickHouse/ClickHouse/pull/18192) ([Stig Bakken](https://github.com/stigsb)). +* Do not use non thread-safe function `strerror`. [#18204](https://github.com/ClickHouse/ClickHouse/pull/18204) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Updating TestFlows version to the latest 1.6.72 - Re-generating requirements.py. [#18208](https://github.com/ClickHouse/ClickHouse/pull/18208) ([vzakaznikov](https://github.com/vzakaznikov)). +* Merging requirements for AES encryption functions. Updating aes_encryption tests to use new requirements. Updating TestFlows version to 1.6.72. [#18221](https://github.com/ClickHouse/ClickHouse/pull/18221) ([vzakaznikov](https://github.com/vzakaznikov)). +* Enable Thread Fuzzer for stateless tests flaky check. [#18299](https://github.com/ClickHouse/ClickHouse/pull/18299) ([alesapin](https://github.com/alesapin)). +* Check for leftovers of conflict markers in docs. [#18332](https://github.com/ClickHouse/ClickHouse/pull/18332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix codespell warnings. Split style checks into separate parts. Update style checks docker image. [#18463](https://github.com/ClickHouse/ClickHouse/pull/18463) ([Ilya Yatsishin](https://github.com/qoega)). +* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). +* Fix shellcheck errors in style check. [#18566](https://github.com/ClickHouse/ClickHouse/pull/18566) ([Ilya Yatsishin](https://github.com/qoega)). +* TestFlows: fixes to LDAP tests that fail due to slow test execution. [#18790](https://github.com/ClickHouse/ClickHouse/pull/18790) ([vzakaznikov](https://github.com/vzakaznikov)). +* Generate build id when ClickHouse is linked with `lld`. It's appeared that `lld` does not generate it by default on my machine. Build id is used for crash reports and introspection. [#18808](https://github.com/ClickHouse/ClickHouse/pull/18808) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add simple integrity check for ClickHouse binary. It allows to detect corruption due to faulty hardware (bit rot on storage media or bit flips in RAM). [#18811](https://github.com/ClickHouse/ClickHouse/pull/18811) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Removed the -finline-hint-functions flag not present in GCC. [#18846](https://github.com/ClickHouse/ClickHouse/pull/18846) ([Mike Kot](https://github.com/myrrc)). +* Add `SYSTEM SUSPEND` command for fault injection. It can be used to faciliate failover tests. This closes [#15979](https://github.com/ClickHouse/ClickHouse/issues/15979). [#18850](https://github.com/ClickHouse/ClickHouse/pull/18850) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.20 to 0.5.0 in /docs/tools'. [#17351](https://github.com/ClickHouse/ClickHouse/pull/17351) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Revert "Attempt to fix Stress test (MSan)"'. [#17372](https://github.com/ClickHouse/ClickHouse/pull/17372) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Revert "Bump mkdocs-macros-plugin from 0.4.20 to 0.5.0 in /docs/tools"'. [#17405](https://github.com/ClickHouse/ClickHouse/pull/17405) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Update README.md'. [#17596](https://github.com/ClickHouse/ClickHouse/pull/17596) ([Robert Hodges](https://github.com/hodgesrm)). +* NO CL ENTRY: 'Revert "Fix index granularity calculation on block borders"'. [#17918](https://github.com/ClickHouse/ClickHouse/pull/17918) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Date vs DateTime64 comparison"'. [#17985](https://github.com/ClickHouse/ClickHouse/pull/17985) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Исправил опечатку в названии ОС RedHad->RedHat'. [#18028](https://github.com/ClickHouse/ClickHouse/pull/18028) ([Ed Rakhmankulov](https://github.com/Erixonich)). +* NO CL ENTRY: 'Revert "Fix access rights required for the merge() table function."'. [#18103](https://github.com/ClickHouse/ClickHouse/pull/18103) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add some extra tests to copier"'. [#18636](https://github.com/ClickHouse/ClickHouse/pull/18636) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Fix typo in array functions' documentation'. [#18792](https://github.com/ClickHouse/ClickHouse/pull/18792) ([Bertrand Junqua](https://github.com/Bertrand31)). +* NO CL ENTRY: 'Revert "Add metrics for part number in MergeTree in ClickHouse"'. [#18834](https://github.com/ClickHouse/ClickHouse/pull/18834) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Fixed typo in metrics.md'. [#18920](https://github.com/ClickHouse/ClickHouse/pull/18920) ([Mark Frost](https://github.com/frostmark)). + diff --git a/docs/changelogs/v21.1.2.15-stable.md b/docs/changelogs/v21.1.2.15-stable.md new file mode 100644 index 00000000000..205794b94c2 --- /dev/null +++ b/docs/changelogs/v21.1.2.15-stable.md @@ -0,0 +1,28 @@ +### ClickHouse release v21.1.2.15-stable FIXME as compared to v21.1.1.5646-prestable + +#### Improvement +* Backported in [#19148](https://github.com/ClickHouse/ClickHouse/issues/19148): Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix +* Backported in [#19203](https://github.com/ClickHouse/ClickHouse/issues/19203): `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#19161](https://github.com/ClickHouse/ClickHouse/issues/19161): Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#18948](https://github.com/ClickHouse/ClickHouse/issues/18948): Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18934](https://github.com/ClickHouse/ClickHouse/issues/18934): Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)). +* Backported in [#19115](https://github.com/ClickHouse/ClickHouse/issues/19115): Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Backported in [#18966](https://github.com/ClickHouse/ClickHouse/issues/18966): Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* Backported in [#19005](https://github.com/ClickHouse/ClickHouse/issues/19005): Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19009](https://github.com/ClickHouse/ClickHouse/issues/19009): Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). +* Backported in [#19193](https://github.com/ClickHouse/ClickHouse/issues/19193): Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19004](https://github.com/ClickHouse/ClickHouse/issues/19004): Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19045](https://github.com/ClickHouse/ClickHouse/issues/19045): Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19070](https://github.com/ClickHouse/ClickHouse/issues/19070): Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#19053](https://github.com/ClickHouse/ClickHouse/issues/19053): Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19081](https://github.com/ClickHouse/ClickHouse/issues/19081): Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19046](https://github.com/ClickHouse/ClickHouse/issues/19046): Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19126](https://github.com/ClickHouse/ClickHouse/issues/19126): Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#19121](https://github.com/ClickHouse/ClickHouse/issues/19121): Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19150](https://github.com/ClickHouse/ClickHouse/issues/19150): Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Backported in [#19177](https://github.com/ClickHouse/ClickHouse/issues/19177): Fix bug in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). +* Backported in [#19179](https://github.com/ClickHouse/ClickHouse/issues/19179): Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19178](https://github.com/ClickHouse/ClickHouse/issues/19178): - Split RemoteQueryExecutorReadContext into module part - Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.1.3.32-stable.md b/docs/changelogs/v21.1.3.32-stable.md new file mode 100644 index 00000000000..ea4c9fd0fe6 --- /dev/null +++ b/docs/changelogs/v21.1.3.32-stable.md @@ -0,0 +1,31 @@ +### ClickHouse release v21.1.3.32-stable FIXME as compared to v21.1.2.15-stable + +#### Bug Fix +* Backported in [#19654](https://github.com/ClickHouse/ClickHouse/issues/19654): fix data type convert issue for mysql engine ... [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). +* Backported in [#19423](https://github.com/ClickHouse/ClickHouse/issues/19423): Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19458](https://github.com/ClickHouse/ClickHouse/issues/19458): Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19271](https://github.com/ClickHouse/ClickHouse/issues/19271): Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Backported in [#19425](https://github.com/ClickHouse/ClickHouse/issues/19425): Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19456](https://github.com/ClickHouse/ClickHouse/issues/19456): Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19554](https://github.com/ClickHouse/ClickHouse/issues/19554): Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19506](https://github.com/ClickHouse/ClickHouse/issues/19506): Added `cast`, `accurateCast`, `accurateCastOrNull` performance tests. [#19354](https://github.com/ClickHouse/ClickHouse/pull/19354) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#19471](https://github.com/ClickHouse/ClickHouse/issues/19471): - Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#19438](https://github.com/ClickHouse/ClickHouse/issues/19438): Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19485](https://github.com/ClickHouse/ClickHouse/issues/19485): Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19615](https://github.com/ClickHouse/ClickHouse/issues/19615): Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19507](https://github.com/ClickHouse/ClickHouse/issues/19507): Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19539](https://github.com/ClickHouse/ClickHouse/issues/19539): Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19640](https://github.com/ClickHouse/ClickHouse/issues/19640): Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#19636](https://github.com/ClickHouse/ClickHouse/issues/19636): `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19650](https://github.com/ClickHouse/ClickHouse/issues/19650): Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19738](https://github.com/ClickHouse/ClickHouse/issues/19738): Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19700](https://github.com/ClickHouse/ClickHouse/issues/19700): Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19742](https://github.com/ClickHouse/ClickHouse/issues/19742): Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#19782](https://github.com/ClickHouse/ClickHouse/issues/19782): Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19870](https://github.com/ClickHouse/ClickHouse/issues/19870): Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19814](https://github.com/ClickHouse/ClickHouse/issues/19814): In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19835](https://github.com/ClickHouse/ClickHouse/issues/19835): Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#19910](https://github.com/ClickHouse/ClickHouse/issues/19910): Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#19938](https://github.com/ClickHouse/ClickHouse/issues/19938): Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19934](https://github.com/ClickHouse/ClickHouse/issues/19934): BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). + diff --git a/docs/changelogs/v21.1.4.46-stable.md b/docs/changelogs/v21.1.4.46-stable.md new file mode 100644 index 00000000000..3033c5edd21 --- /dev/null +++ b/docs/changelogs/v21.1.4.46-stable.md @@ -0,0 +1,22 @@ +### ClickHouse release v21.1.4.46-stable FIXME as compared to v21.1.3.32-stable + +#### Bug Fix +* Backported in [#19983](https://github.com/ClickHouse/ClickHouse/issues/19983): Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Backported in [#20238](https://github.com/ClickHouse/ClickHouse/issues/20238): Fix a bug that moving pieces to destination table may failed in case of launching multiple clickhouse-copiers. [#19743](https://github.com/ClickHouse/ClickHouse/pull/19743) ([madianjun](https://github.com/mdianjun)). +* Backported in [#20074](https://github.com/ClickHouse/ClickHouse/issues/20074): Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#19997](https://github.com/ClickHouse/ClickHouse/issues/19997): - Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). +* Backported in [#20122](https://github.com/ClickHouse/ClickHouse/issues/20122): MaterializeMySQL: Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). +* Backported in [#20297](https://github.com/ClickHouse/ClickHouse/issues/20297): * Bugfix in StorageJoin. [#20079](https://github.com/ClickHouse/ClickHouse/pull/20079) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#20390](https://github.com/ClickHouse/ClickHouse/issues/20390): The `MongoDB` table engine now establishes connection only when it's going to read data. `ATTACH TABLE` won't try to connect anymore. [#20110](https://github.com/ClickHouse/ClickHouse/pull/20110) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#20148](https://github.com/ClickHouse/ClickHouse/issues/20148): Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). +* Backported in [#20327](https://github.com/ClickHouse/ClickHouse/issues/20327): Fix rare server crash on config reload during the shutdown. Fixes [#19689](https://github.com/ClickHouse/ClickHouse/issues/19689). [#20224](https://github.com/ClickHouse/ClickHouse/pull/20224) ([alesapin](https://github.com/alesapin)). +* Backported in [#20331](https://github.com/ClickHouse/ClickHouse/issues/20331): Restrict to `DROP` or `RENAME` version column of `*CollapsingMergeTree` and `ReplacingMergeTree` table engines. [#20300](https://github.com/ClickHouse/ClickHouse/pull/20300) ([alesapin](https://github.com/alesapin)). +* Backported in [#20363](https://github.com/ClickHouse/ClickHouse/issues/20363): Fix too often retries of failed background tasks for `ReplicatedMergeTree` table engines family. This could lead to too verbose logging and increased CPU load. Fixes [#20203](https://github.com/ClickHouse/ClickHouse/issues/20203). [#20335](https://github.com/ClickHouse/ClickHouse/pull/20335) ([alesapin](https://github.com/alesapin)). +* Backported in [#20378](https://github.com/ClickHouse/ClickHouse/issues/20378): Fix incorrect result of binary operations between two constant decimals of different scale. Fixes [#20283](https://github.com/ClickHouse/ClickHouse/issues/20283). [#20339](https://github.com/ClickHouse/ClickHouse/pull/20339) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#20375](https://github.com/ClickHouse/ClickHouse/issues/20375): Fix null dereference with `join_use_nulls=1`. [#20344](https://github.com/ClickHouse/ClickHouse/pull/20344) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20359](https://github.com/ClickHouse/ClickHouse/issues/20359): Avoid invalid dereference in RANGE_HASHED() dictionary. [#20345](https://github.com/ClickHouse/ClickHouse/pull/20345) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#20224](https://github.com/ClickHouse/ClickHouse/issues/20224) to 21.1: Fix access control manager destruction order"'. [#20395](https://github.com/ClickHouse/ClickHouse/pull/20395) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v21.1.5.4-stable.md b/docs/changelogs/v21.1.5.4-stable.md new file mode 100644 index 00000000000..c67724a4512 --- /dev/null +++ b/docs/changelogs/v21.1.5.4-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.1.5.4-stable FIXME as compared to v21.1.4.46-stable + +#### Bug Fix +* Backported in [#20678](https://github.com/ClickHouse/ClickHouse/issues/20678): Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20646](https://github.com/ClickHouse/ClickHouse/issues/20646): The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#20507](https://github.com/ClickHouse/ClickHouse/issues/20507): Fixed the behavior when in case of broken JSON we tried to read the whole file into memory which leads to exception from the allocator. Fixes [#19719](https://github.com/ClickHouse/ClickHouse/issues/19719). [#20286](https://github.com/ClickHouse/ClickHouse/pull/20286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#20618](https://github.com/ClickHouse/ClickHouse/issues/20618): Check if table function `view` is used in expression list and throw an error. This fixes [#20342](https://github.com/ClickHouse/ClickHouse/issues/20342). [#20350](https://github.com/ClickHouse/ClickHouse/pull/20350) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#20488](https://github.com/ClickHouse/ClickHouse/issues/20488): Fix `LOGICAL_ERROR` for `join_use_nulls=1` when JOIN contains const from SELECT. [#20461](https://github.com/ClickHouse/ClickHouse/pull/20461) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20886](https://github.com/ClickHouse/ClickHouse/issues/20886): Fix subquery with union distinct and limit clause. close [#20597](https://github.com/ClickHouse/ClickHouse/issues/20597). [#20610](https://github.com/ClickHouse/ClickHouse/pull/20610) ([flynn](https://github.com/ucasfl)). +* Backported in [#20992](https://github.com/ClickHouse/ClickHouse/issues/20992): Fix usage of `-Distinct` combinator with `-State` combinator in aggregate functions. [#20866](https://github.com/ClickHouse/ClickHouse/pull/20866) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#20988](https://github.com/ClickHouse/ClickHouse/issues/20988): `USE database;` query did not work when using MySQL 5.7 client to connect to ClickHouse server, it's fixed. Fixes [#18926](https://github.com/ClickHouse/ClickHouse/issues/18926). [#20878](https://github.com/ClickHouse/ClickHouse/pull/20878) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v21.1.6.13-stable.md b/docs/changelogs/v21.1.6.13-stable.md new file mode 100644 index 00000000000..547cd38a06f --- /dev/null +++ b/docs/changelogs/v21.1.6.13-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.1.6.13-stable FIXME as compared to v21.1.5.4-stable + +#### Bug Fix +* Backported in [#20637](https://github.com/ClickHouse/ClickHouse/issues/20637): Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). +* Backported in [#20573](https://github.com/ClickHouse/ClickHouse/issues/20573): Fix crash which could happen if unknown packet was received from remove query (was introduced in [#17868](https://github.com/ClickHouse/ClickHouse/issues/17868)). [#20547](https://github.com/ClickHouse/ClickHouse/pull/20547) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21005](https://github.com/ClickHouse/ClickHouse/issues/21005): Fix 'Empty task was returned from async task queue' on query cancellation. [#20881](https://github.com/ClickHouse/ClickHouse/pull/20881) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21131](https://github.com/ClickHouse/ClickHouse/issues/21131): Fixed behaviour, when `ALTER MODIFY COLUMN` created mutation, that will knowingly fail. [#21007](https://github.com/ClickHouse/ClickHouse/pull/21007) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#21250](https://github.com/ClickHouse/ClickHouse/issues/21250): - Block parallel insertions into storage join. [#21009](https://github.com/ClickHouse/ClickHouse/pull/21009) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#21068](https://github.com/ClickHouse/ClickHouse/issues/21068): Out of bound memory access was possible when formatting specifically crafted out of range value of type `DateTime64`. This closes [#20494](https://github.com/ClickHouse/ClickHouse/issues/20494). This closes [#20543](https://github.com/ClickHouse/ClickHouse/issues/20543). [#21023](https://github.com/ClickHouse/ClickHouse/pull/21023) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#21229](https://github.com/ClickHouse/ClickHouse/issues/21229): Fixes [#21112](https://github.com/ClickHouse/ClickHouse/issues/21112). Fixed bug that could cause duplicates with insert query (if one of the callbacks came a little too late). [#21138](https://github.com/ClickHouse/ClickHouse/pull/21138) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#21277](https://github.com/ClickHouse/ClickHouse/issues/21277): Fix bug with `join_use_nulls` and joining `TOTALS` from subqueries. This closes [#19362](https://github.com/ClickHouse/ClickHouse/issues/19362) and [#21137](https://github.com/ClickHouse/ClickHouse/issues/21137). [#21248](https://github.com/ClickHouse/ClickHouse/pull/21248) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v21.1.7.1-stable.md b/docs/changelogs/v21.1.7.1-stable.md new file mode 100644 index 00000000000..371efb8d5df --- /dev/null +++ b/docs/changelogs/v21.1.7.1-stable.md @@ -0,0 +1,11 @@ +### ClickHouse release v21.1.7.1-stable FIXME as compared to v21.1.6.13-stable + +#### Bug Fix +* Backported in [#21261](https://github.com/ClickHouse/ClickHouse/issues/21261): fix default_replica_path and default_replica_name values are useless on Replicated(*)MergeTree engine when the engine needs specify other parameters. [#21060](https://github.com/ClickHouse/ClickHouse/pull/21060) ([mxzlxy](https://github.com/mxzlxy)). +* Backported in [#21155](https://github.com/ClickHouse/ClickHouse/issues/21155): fix bug related to cast tuple to map. Closes [#21029](https://github.com/ClickHouse/ClickHouse/issues/21029). [#21120](https://github.com/ClickHouse/ClickHouse/pull/21120) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#21233](https://github.com/ClickHouse/ClickHouse/issues/21233): Now mutations allowed only for table engines that support them (MergeTree family, Memory, MaterializedView). Other engines will report a more clear error. Fixes [#21168](https://github.com/ClickHouse/ClickHouse/issues/21168). [#21183](https://github.com/ClickHouse/ClickHouse/pull/21183) ([alesapin](https://github.com/alesapin)). +* Backported in [#21428](https://github.com/ClickHouse/ClickHouse/issues/21428): Fix crash in `EXPLAIN` for query with `UNION`. Fixes [#20876](https://github.com/ClickHouse/ClickHouse/issues/20876), [#21170](https://github.com/ClickHouse/ClickHouse/issues/21170). [#21246](https://github.com/ClickHouse/ClickHouse/pull/21246) ([flynn](https://github.com/ucasfl)). +* Backported in [#21408](https://github.com/ClickHouse/ClickHouse/issues/21408): Fix redundant reconnects to ZooKeeper and the possibility of two active sessions for a single clickhouse server. Both problems introduced in [#14678](https://github.com/ClickHouse/ClickHouse/issues/14678). [#21264](https://github.com/ClickHouse/ClickHouse/pull/21264) ([alesapin](https://github.com/alesapin)). +* Backported in [#21549](https://github.com/ClickHouse/ClickHouse/issues/21549): Now `ALTER MODIFY COLUMN` queries will correctly affect changes in partition key, skip indices, TTLs, and so on. Fixes [#13675](https://github.com/ClickHouse/ClickHouse/issues/13675). [#21334](https://github.com/ClickHouse/ClickHouse/pull/21334) ([alesapin](https://github.com/alesapin)). +* Backported in [#21377](https://github.com/ClickHouse/ClickHouse/issues/21377): Fix error `Bad cast from type ... to DB::ColumnLowCardinality` while inserting into table with `LowCardinality` column from `Values` format. Fixes [#21140](https://github.com/ClickHouse/ClickHouse/issues/21140). [#21357](https://github.com/ClickHouse/ClickHouse/pull/21357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.1.8.30-stable.md b/docs/changelogs/v21.1.8.30-stable.md new file mode 100644 index 00000000000..0859cc8ccbd --- /dev/null +++ b/docs/changelogs/v21.1.8.30-stable.md @@ -0,0 +1,29 @@ +### ClickHouse release v21.1.8.30-stable FIXME as compared to v21.1.7.1-stable + +#### Bug Fix +* Backported in [#21205](https://github.com/ClickHouse/ClickHouse/issues/21205): Fix the metadata leak when the Replicated*MergeTree with custom (non default) ZooKeeper cluster is dropped. [#21119](https://github.com/ClickHouse/ClickHouse/pull/21119) ([fastio](https://github.com/fastio)). +* Backported in [#21161](https://github.com/ClickHouse/ClickHouse/issues/21161): Fix `input_format_null_as_default` take effective when types are nullable. This fixes [#21116](https://github.com/ClickHouse/ClickHouse/issues/21116) . [#21121](https://github.com/ClickHouse/ClickHouse/pull/21121) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#21926](https://github.com/ClickHouse/ClickHouse/issues/21926): Fix Avro format parsing for Kafka. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)). +* Backported in [#22189](https://github.com/ClickHouse/ClickHouse/issues/22189): Fixed race on SSL object inside SecureSocket in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#22317](https://github.com/ClickHouse/ClickHouse/issues/22317): Remove unknown columns from joined table in where for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#21797](https://github.com/ClickHouse/ClickHouse/issues/21797): Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22287](https://github.com/ClickHouse/ClickHouse/issues/22287): Start accepting connections after DDLWorker and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21810](https://github.com/ClickHouse/ClickHouse/issues/21810): Fix bug for ReplicatedMerge table engines when `ALTER MODIFY COLUMN` query doesn't change the type of decimal column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)). +* Backported in [#21880](https://github.com/ClickHouse/ClickHouse/issues/21880): Fix possible crashes in aggregate functions with combinator Distinct, while using two-level aggregation. This is a follow-up fix of https://github.com/ClickHouse/ClickHouse/pull/18365 . Can only reproduced in production env. No test case available yet. cc @CurtizJ. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22052](https://github.com/ClickHouse/ClickHouse/issues/22052): Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#21981](https://github.com/ClickHouse/ClickHouse/issues/21981): Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#22465](https://github.com/ClickHouse/ClickHouse/issues/22465): In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#22149](https://github.com/ClickHouse/ClickHouse/issues/22149): The function `decrypt` was lacking a check for the minimal size of data encrypted in AEAD mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22283](https://github.com/ClickHouse/ClickHouse/issues/22283): Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)). +* Backported in [#22280](https://github.com/ClickHouse/ClickHouse/issues/22280): Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)). +* Backported in [#22501](https://github.com/ClickHouse/ClickHouse/issues/22501): Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22368](https://github.com/ClickHouse/ClickHouse/issues/22368): Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)). +* Backported in [#22533](https://github.com/ClickHouse/ClickHouse/issues/22533): Buffer overflow (on read) was possible in `tokenbf_v1` full text index. The excessive bytes are not used but the read operation may lead to crash in rare cases. This closes [#19233](https://github.com/ClickHouse/ClickHouse/issues/19233). [#22421](https://github.com/ClickHouse/ClickHouse/pull/22421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22462](https://github.com/ClickHouse/ClickHouse/issues/22462): Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22555](https://github.com/ClickHouse/ClickHouse/issues/22555): Fix bug in partial merge join with `LowCardinality`. Close [#22386](https://github.com/ClickHouse/ClickHouse/issues/22386), close [#22388](https://github.com/ClickHouse/ClickHouse/issues/22388). [#22510](https://github.com/ClickHouse/ClickHouse/pull/22510) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#22608](https://github.com/ClickHouse/ClickHouse/issues/22608): Fix deserialization of empty string without newline at end of TSV format. This closes [#20244](https://github.com/ClickHouse/ClickHouse/issues/20244). Possible workaround without version update: set `input_format_null_as_default` to zero. It was zero in old versions. [#22527](https://github.com/ClickHouse/ClickHouse/pull/22527) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22578](https://github.com/ClickHouse/ClickHouse/issues/22578): Fix UB by unlocking the rwlock of the TinyLog from the same thread. [#22560](https://github.com/ClickHouse/ClickHouse/pull/22560) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22650](https://github.com/ClickHouse/ClickHouse/issues/22650): Avoid UB in *Log engines for rwlock unlock due to unlock from another thread. [#22583](https://github.com/ClickHouse/ClickHouse/pull/22583) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22700](https://github.com/ClickHouse/ClickHouse/issues/22700): Fix wait for mutations on several replicas for ReplicatedMergeTree table engines. Previously, mutation/alter query may finish before mutation actually executed on other replicas. [#22669](https://github.com/ClickHouse/ClickHouse/pull/22669) ([alesapin](https://github.com/alesapin)). +* Backported in [#22739](https://github.com/ClickHouse/ClickHouse/issues/22739): Fix possible hangs in zk requests in case of OOM exception. Fixes [#22438](https://github.com/ClickHouse/ClickHouse/issues/22438). [#22684](https://github.com/ClickHouse/ClickHouse/pull/22684) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.1.9.41-stable.md b/docs/changelogs/v21.1.9.41-stable.md new file mode 100644 index 00000000000..2a94073b810 --- /dev/null +++ b/docs/changelogs/v21.1.9.41-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v21.1.9.41-stable FIXME as compared to v21.1.8.30-stable + +#### Improvement +* Backported in [#22817](https://github.com/ClickHouse/ClickHouse/issues/22817): Make FQDN and other DNS related functions work correctly in alpine images. [#20336](https://github.com/ClickHouse/ClickHouse/pull/20336) ([filimonov](https://github.com/filimonov)). +* Backported in [#22810](https://github.com/ClickHouse/ClickHouse/issues/22810): If PODArray was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#22967](https://github.com/ClickHouse/ClickHouse/issues/22967): Fix very rare bug when quorum insert with `quorum_parallel=1` is not really "quorum" because of deduplication. [#18215](https://github.com/ClickHouse/ClickHouse/pull/18215) ([filimonov](https://github.com/filimonov)). +* Backported in [#22088](https://github.com/ClickHouse/ClickHouse/issues/22088): In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#21857](https://github.com/ClickHouse/ClickHouse/issues/21857): Fix possible error ` Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22758](https://github.com/ClickHouse/ClickHouse/issues/22758): Fix usage of function `map` in distributed queries. [#22588](https://github.com/ClickHouse/ClickHouse/pull/22588) ([foolchi](https://github.com/foolchi)). +* Backported in [#22890](https://github.com/ClickHouse/ClickHouse/issues/22890): Fix approx total rows accounting for reverse reading from MergeTree. [#22726](https://github.com/ClickHouse/ClickHouse/pull/22726) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22919](https://github.com/ClickHouse/ClickHouse/issues/22919): Fixed a crash when using `mannWhitneyUTest` and `rankCorr` with window functions. This fixes [#22728](https://github.com/ClickHouse/ClickHouse/issues/22728). [#22876](https://github.com/ClickHouse/ClickHouse/pull/22876) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#22813](https://github.com/ClickHouse/ClickHouse/issues/22813): Allow to start up with modified binary under gdb. In previous version if you set up breakpoint in gdb before start, server will refuse to start up due to failed integrity check. [#21258](https://github.com/ClickHouse/ClickHouse/pull/21258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.10.1.8013-prestable.md b/docs/changelogs/v21.10.1.8013-prestable.md new file mode 100644 index 00000000000..d3e06c056cf --- /dev/null +++ b/docs/changelogs/v21.10.1.8013-prestable.md @@ -0,0 +1,138 @@ +### ClickHouse release v21.10.1.8013-prestable FIXME as compared to v21.9.1.7770-prestable + +#### Backward Incompatible Change +* Fix the issue that in case of some sophisticated query with column aliases identical to the names of expressions, bad cast may happen. This fixes [#25447](https://github.com/ClickHouse/ClickHouse/issues/25447). This fixes [#26914](https://github.com/ClickHouse/ClickHouse/issues/26914). This fix may introduce backward incompatibility: if there are different expressions with identical names, exception will be thrown. It may break some rare cases when `enable_optimize_predicate_expression` is set. [#26639](https://github.com/ClickHouse/ClickHouse/pull/26639) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not output trailing zeros in text representation of `Decimal` types. Example: `1.23` will be printed instead of `1.230000` for decimal with scale 6. This closes [#15794](https://github.com/ClickHouse/ClickHouse/issues/15794). It may introduce slight incompatibility if your applications somehow relied on the trailing zeros. Serialization in output formats can be controlled with the setting `output_format_decimal_trailing_zeros`. Implementation of `toString` and casting to String is changed unconditionally. [#27680](https://github.com/ClickHouse/ClickHouse/pull/27680) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now MergeTreeSettings `replicated_max_parallel_sends`, `replicated_max_parallel_sends_for_table`, `replicated_max_parallel_fetches`, `replicated_max_parallel_fetches_for_table` do nothing. They never worked well and were replaced with `max_replicated_fetches_network_bandwidth`, `max_replicated_sends_network_bandwidth` and `background_fetches_pool_size`. [#28404](https://github.com/ClickHouse/ClickHouse/pull/28404) ([alesapin](https://github.com/alesapin)). + +#### New Feature +* Generate a unique server uuid when server starts. [#20089](https://github.com/ClickHouse/ClickHouse/pull/20089) ([Bharat Nallan](https://github.com/bharatnc)). +* Added new commands BACKUP and RESTORE. [#21945](https://github.com/ClickHouse/ClickHouse/pull/21945) ([Vitaly Baranov](https://github.com/vitlibar)). +* Partitioned write into s3 table function. [#23051](https://github.com/ClickHouse/ClickHouse/pull/23051) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Implementation of short circuit function evaluation, closes [#12587](https://github.com/ClickHouse/ClickHouse/issues/12587). Add settings `short_circuit_function_evaluation` to configure short circuit function evaluation. [#23367](https://github.com/ClickHouse/ClickHouse/pull/23367) ([Kruglov Pavel](https://github.com/Avogar)). +* Add feature for creating user-defined functions. [#23978](https://github.com/ClickHouse/ClickHouse/pull/23978) ([Realist007](https://github.com/Realist007)). +* Add support for INTERSECT, EXCEPT, ANY, ALL operators. [#24757](https://github.com/ClickHouse/ClickHouse/pull/24757) ([Kirill Ershov](https://github.com/zdikov)). +* IDisk interface to store data on web server of static files. Closes [#23982](https://github.com/ClickHouse/ClickHouse/issues/23982). [#25251](https://github.com/ClickHouse/ClickHouse/pull/25251) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Introduce lz4 compression for import / export. [#25310](https://github.com/ClickHouse/ClickHouse/pull/25310) ([Bharat Nallan](https://github.com/bharatnc)). +* Support the case when the data is enclosed in array in JSONAsString input format. Closes [#25517](https://github.com/ClickHouse/ClickHouse/issues/25517). [#25633](https://github.com/ClickHouse/ClickHouse/pull/25633) ([Kruglov Pavel](https://github.com/Avogar)). +* Add new column `last_queue_update_exception` to `system.replicas` table. [#26843](https://github.com/ClickHouse/ClickHouse/pull/26843) ([nvartolomei](https://github.com/nvartolomei)). +* ALTER TABLE ... MATERIALIZE COLUMN. [#27038](https://github.com/ClickHouse/ClickHouse/pull/27038) ([Vladimir Chebotarev](https://github.com/excitoon)). +* - Add replicated storage of user, roles, row policies, quotas and settings profiles through ZooKeeper (experimental). [#27426](https://github.com/ClickHouse/ClickHouse/pull/27426) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Allow positional arguments under setting `enable_positional_arguments`. Closes [#2592](https://github.com/ClickHouse/ClickHouse/issues/2592). [#27530](https://github.com/ClickHouse/ClickHouse/pull/27530) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added ComplexKeyRangeHashed dictionary. Closes [#22029](https://github.com/ClickHouse/ClickHouse/issues/22029). [#27629](https://github.com/ClickHouse/ClickHouse/pull/27629) ([Maksim Kita](https://github.com/kitaisreal)). +* add conversion functions between snowflake id and dateTime(dateTime64) Close [#27058](https://github.com/ClickHouse/ClickHouse/issues/27058). [#27704](https://github.com/ClickHouse/ClickHouse/pull/27704) ([jasine](https://github.com/jasine)). +* Add feature for creating user-defined functions as lambda expressions. Syntax `CREATE FUNCTION {function_name} as ({parameters}) -> {function core}`. Example `CREATE FUNCTION plus_one as (a) -> a + 1`. Authors @Realist007. [#27796](https://github.com/ClickHouse/ClickHouse/pull/27796) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `getServerPort` function to allow getting server port. When the port is not used by the server, throw an exception. [#27900](https://github.com/ClickHouse/ClickHouse/pull/27900) ([Amos Bird](https://github.com/amosbird)). +* Accept user settings related to file formats in `SETTINGS` clause in `CREATE` query. This closes [#27580](https://github.com/ClickHouse/ClickHouse/issues/27580). [#28037](https://github.com/ClickHouse/ClickHouse/pull/28037) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add a system table of table_views, convenient to query the dependency relationship between tables and views. [#28082](https://github.com/ClickHouse/ClickHouse/pull/28082) ([zhongyuankai](https://github.com/zhongyuankai)). +* Added `executable` storage engine and table function. Authors @ruct. [#28102](https://github.com/ClickHouse/ClickHouse/pull/28102) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `ExecutablePool` storage. [#28518](https://github.com/ClickHouse/ClickHouse/pull/28518) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* Introducing two checks in `sequenceMatch` and `sequenceCount` that allow for early exit when some deterministic part of the sequence pattern is missing from the events list. This change unlocks many queries that would previously fail due to reaching operations cap, and generally speeds up the pipeline. [#27729](https://github.com/ClickHouse/ClickHouse/pull/27729) ([Jakub Kuklis](https://github.com/jkuklis)). +* Make `hasAll` filter condition leverage bloom filter data-skipping indexes. [#27984](https://github.com/ClickHouse/ClickHouse/pull/27984) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). +* Speed up sumIf and countIf aggregation functions. [#28272](https://github.com/ClickHouse/ClickHouse/pull/28272) ([Raúl Marín](https://github.com/Algunenano)). +* Enhance primary key analysis with always monotonic information of binary functions, notably non-zero constant division. [#28302](https://github.com/ClickHouse/ClickHouse/pull/28302) ([Amos Bird](https://github.com/amosbird)). + +#### Improvement +* Create virtual projection for `min_max` indices. Now, when `allow_experimental_projection_optimization ` is enabled, queries will use minmax index instead of reading a part when possible. [#26286](https://github.com/ClickHouse/ClickHouse/pull/26286) ([Amos Bird](https://github.com/amosbird)). +* improve Materialize TTL by recalculating ttl.txt only without actual ttl action. [#27019](https://github.com/ClickHouse/ClickHouse/pull/27019) ([lthaooo](https://github.com/lthaooo)). +* Improved the existence condition judgment and empty string node judgment when clickhouse-keeper creates znode. [#27125](https://github.com/ClickHouse/ClickHouse/pull/27125) ([小路](https://github.com/nicelulu)). +* Don't silently ignore errors and don't count delays in `ReadBufferFromS3`. [#27484](https://github.com/ClickHouse/ClickHouse/pull/27484) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add `log_queries_probability` setting that allows user to write to query_log only a sample of queries. Closes [#16609](https://github.com/ClickHouse/ClickHouse/issues/16609). [#27527](https://github.com/ClickHouse/ClickHouse/pull/27527) ([Nikolay Degterinsky](https://github.com/evillique)). +* Disable arrayJoin on partition expressions. [#27648](https://github.com/ClickHouse/ClickHouse/pull/27648) ([Raúl Marín](https://github.com/Algunenano)). +* - Add `FROM INFILE` command. [#27655](https://github.com/ClickHouse/ClickHouse/pull/27655) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Enables query parameters to be passed in the body of http requests. [#27706](https://github.com/ClickHouse/ClickHouse/pull/27706) ([Hermano Lustosa](https://github.com/hllustosa)). +* Remove duplicate index analysis and avoid possible invalid limit checks during projection analysis. [#27742](https://github.com/ClickHouse/ClickHouse/pull/27742) ([Amos Bird](https://github.com/amosbird)). +* Add aggregate function `quantileBFloat16Weighted` similarly to other quantile...Weighted functions. This closes [#27745](https://github.com/ClickHouse/ClickHouse/issues/27745). [#27758](https://github.com/ClickHouse/ClickHouse/pull/27758) ([Ivan Novitskiy](https://github.com/RedClusive)). +* Now `ALTER MODIFY COLUM` DataType to `Nullable(DataType)` doesn't require mutation. [#27787](https://github.com/ClickHouse/ClickHouse/pull/27787) ([victorgao](https://github.com/kafka1991)). +* Allow symlinks for library dictionaty path. [#27815](https://github.com/ClickHouse/ClickHouse/pull/27815) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add function `SHA512`. [#27830](https://github.com/ClickHouse/ClickHouse/pull/27830) ([zhanglistar](https://github.com/zhanglistar)). +* Use Multipart copy upload for large S3 objects. [#27858](https://github.com/ClickHouse/ClickHouse/pull/27858) ([ianton-ru](https://github.com/ianton-ru)). +* Improve remote query cancelation (in case of remote server abnormaly terminated). [#27881](https://github.com/ClickHouse/ClickHouse/pull/27881) ([Azat Khuzhin](https://github.com/azat)). +* Enable tcp_keep_alive_timeout by default. [#27882](https://github.com/ClickHouse/ClickHouse/pull/27882) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect assertion during writing to StorageKafka. [#27885](https://github.com/ClickHouse/ClickHouse/pull/27885) ([Azat Khuzhin](https://github.com/azat)). +* Support lambda argument for APPLY column transformer which allows applying functions with more than one argument. This is for [#27877](https://github.com/ClickHouse/ClickHouse/issues/27877). [#27901](https://github.com/ClickHouse/ClickHouse/pull/27901) ([Amos Bird](https://github.com/amosbird)). +* Add interactive documentation in `clickhouse-client` about how to reset the password. This is useful in scenario when user has installed ClickHouse, set up the password and instantly forget it. See [#27750](https://github.com/ClickHouse/ClickHouse/issues/27750). [#27903](https://github.com/ClickHouse/ClickHouse/pull/27903) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to create dictionaries with empty attributes list. [#27905](https://github.com/ClickHouse/ClickHouse/pull/27905) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `replication_wait_for_inactive_replica_timeout` setting. It allows to specify how long to wait for inactive replicas to execute `ALTER`/`OPTIMZE`/`TRUNCATE` query (default is 120 seconds). If `replication_alter_partitions_sync` is 2 and some replicas are not active for more than `replication_wait_for_inactive_replica_timeout` seconds, then `UNFINISHED` will be thrown. [#27931](https://github.com/ClickHouse/ClickHouse/pull/27931) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a setting `empty_result_for_aggregation_by_constant_keys_on_empty_set` to control the behavior of grouping by constant keys on empty set. This is to bring back the old baviour of [#6842](https://github.com/ClickHouse/ClickHouse/issues/6842). [#27932](https://github.com/ClickHouse/ClickHouse/pull/27932) ([Amos Bird](https://github.com/amosbird)). +* Lower restrictions for Enum data type to allow attaching compatible data. Closes [#26672](https://github.com/ClickHouse/ClickHouse/issues/26672). [#28028](https://github.com/ClickHouse/ClickHouse/pull/28028) ([Dmitry Novik](https://github.com/novikd)). +* Support ON CONFLICT clause when inserting into PostgreSQL table engine or table function. Closes [#27727](https://github.com/ClickHouse/ClickHouse/issues/27727). [#28081](https://github.com/ClickHouse/ClickHouse/pull/28081) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support implicit conversions between index in operator `[]` and key of type `Map` (e.g. different `Int` types, `String` and `FixedString`). [#28096](https://github.com/ClickHouse/ClickHouse/pull/28096) ([Anton Popov](https://github.com/CurtizJ)). +* Enable optimize_distributed_group_by_sharding_key by default. [#28105](https://github.com/ClickHouse/ClickHouse/pull/28105) ([Azat Khuzhin](https://github.com/azat)). +* Fix `zookeeper_log.address` (before the first patch in this PR the address was always `::`) and reduce number of calls `getpeername(2)` for this column (since each time entry for `zookeeper_log` is added `getpeername()` is called, cache this address in the zookeeper client to avoid this). [#28212](https://github.com/ClickHouse/ClickHouse/pull/28212) ([Azat Khuzhin](https://github.com/azat)). +* Fix removing of parts in a Temporary state which can lead to an unexpected exception (`Part %name% doesn't exist`). Fixes [#23661](https://github.com/ClickHouse/ClickHouse/issues/23661). [#28221](https://github.com/ClickHouse/ClickHouse/pull/28221) ([Azat Khuzhin](https://github.com/azat)). +* Added libhdfs3_conf in server config instead of export env LIBHDFS3_CONF in clickhouse-server.service. [#28268](https://github.com/ClickHouse/ClickHouse/pull/28268) ([Zhichang Yu](https://github.com/yuzhichang)). +* Use real tmp file instead of predefined "rows_sources" for vertical merges. This avoids generating garbage directories in tmp disks. [#28299](https://github.com/ClickHouse/ClickHouse/pull/28299) ([Amos Bird](https://github.com/amosbird)). +* Speed up data parts loading by delaying table startup process. [#28313](https://github.com/ClickHouse/ClickHouse/pull/28313) ([Amos Bird](https://github.com/amosbird)). +* Allow ssl connection for RabbitMQ engine. [#28365](https://github.com/ClickHouse/ClickHouse/pull/28365) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix removing of parts in a Temporary state (follow up for [#28221](https://github.com/ClickHouse/ClickHouse/issues/28221)). [#28366](https://github.com/ClickHouse/ClickHouse/pull/28366) ([Azat Khuzhin](https://github.com/azat)). +* Do not allow creating StorageMaterializedPostgreSQL with bad arguments. Closes [#28423](https://github.com/ClickHouse/ClickHouse/issues/28423). [#28430](https://github.com/ClickHouse/ClickHouse/pull/28430) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Introduce `connection_wait_timeout` (default to 5 seconds, 0 - do not wait) setting for MySQL engine. [#28474](https://github.com/ClickHouse/ClickHouse/pull/28474) ([Azat Khuzhin](https://github.com/azat)). +* Fix strange sessions expiration logic in Keeper. Probably it should help in CI: https://clickhouse-test-reports.s3.yandex.net/0/6bd9b82141c98dcd7796fd9d08326831095ba519/stress_test_(debug).html#fail1. [#28519](https://github.com/ClickHouse/ClickHouse/pull/28519) ([alesapin](https://github.com/alesapin)). +* To be added. Closes [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#28614](https://github.com/ClickHouse/ClickHouse/pull/28614) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Bugfix for windowFunnel's "strict" mode. This fixes [#27469](https://github.com/ClickHouse/ClickHouse/issues/27469). [#27563](https://github.com/ClickHouse/ClickHouse/pull/27563) ([achimbab](https://github.com/achimbab)). +* - Fix bug with aliased column in `Distributed` table. [#27652](https://github.com/ClickHouse/ClickHouse/pull/27652) ([Vladimir C](https://github.com/vdimir)). +* Fixed another case of `Unexpected merged part ... intersecting drop range ...` error. [#27656](https://github.com/ClickHouse/ClickHouse/pull/27656) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bad type cast when functions like `arrayHas` are applied to arrays of LowCardinality of Nullable of different non-numeric types like `DateTime` and `DateTime64`. In previous versions bad cast occurs. In new version it will lead to exception. This closes [#26330](https://github.com/ClickHouse/ClickHouse/issues/26330). [#27682](https://github.com/ClickHouse/ClickHouse/pull/27682) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix column filtering with union distinct in subquery. Closes [#27578](https://github.com/ClickHouse/ClickHouse/issues/27578). [#27689](https://github.com/ClickHouse/ClickHouse/pull/27689) ([Kseniia Sumarokova](https://github.com/kssenii)). +* After https://github.com/ClickHouse/ClickHouse/pull/26384. To execute `GRANT WITH REPLACE OPTION` now the current user should have `GRANT OPTION` for access rights it's going to grant AND for access rights it's going to revoke. [#27701](https://github.com/ClickHouse/ClickHouse/pull/27701) ([Vitaly Baranov](https://github.com/vitlibar)). +* After https://github.com/ClickHouse/ClickHouse/pull/25687. Add backquotes for the default database shown in CREATE USER. [#27702](https://github.com/ClickHouse/ClickHouse/pull/27702) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove duplicated source files in CMakeLists.txt in arrow-cmake. [#27736](https://github.com/ClickHouse/ClickHouse/pull/27736) ([李扬](https://github.com/taiyang-li)). +* Fix possible crash when asynchronous connection draining is enabled and hedged connection is disabled. [#27774](https://github.com/ClickHouse/ClickHouse/pull/27774) ([Amos Bird](https://github.com/amosbird)). +* Prevent crashes for some formats when NULL (tombstone) message was coming from Kafka. Closes [#19255](https://github.com/ClickHouse/ClickHouse/issues/19255). [#27794](https://github.com/ClickHouse/ClickHouse/pull/27794) ([filimonov](https://github.com/filimonov)). +* Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). +* Fix a couple of bugs that may cause replicas to diverge. [#27808](https://github.com/ClickHouse/ClickHouse/pull/27808) ([Alexander Tokmakov](https://github.com/tavplubix)). +* After https://github.com/ClickHouse/ClickHouse/pull/26864. Fix shutdown of `NamedSessionStorage`: session contexts stored in `NamedSessionStorage` are now destroyed before destroying the global context. [#27875](https://github.com/ClickHouse/ClickHouse/pull/27875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix PostgreSQL-style cast (`::` operator) with negative numbers. [#27876](https://github.com/ClickHouse/ClickHouse/pull/27876) ([Anton Popov](https://github.com/CurtizJ)). +* Fix selecting with extremes from a column of the type `LowCardinality(UUID)`. [#27918](https://github.com/ClickHouse/ClickHouse/pull/27918) ([Vitaly Baranov](https://github.com/vitlibar)). +* Check cluster name before creating Distributed table, do not allow to create a table with incorrect cluster name. Fixes [#27832](https://github.com/ClickHouse/ClickHouse/issues/27832). [#27927](https://github.com/ClickHouse/ClickHouse/pull/27927) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix checking access grants when executing GRANT WITH REPLACE statement with ON CLUSTER clause. This PR improves fix https://github.com/ClickHouse/ClickHouse/pull/27701. [#27983](https://github.com/ClickHouse/ClickHouse/pull/27983) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix cases, when read buffer fails with 'attempt to read after end of file'. Closes [#26149](https://github.com/ClickHouse/ClickHouse/issues/26149). [#28150](https://github.com/ClickHouse/ClickHouse/pull/28150) ([Filatenkov Artur](https://github.com/FArthur-cmd)). + +#### Build/Testing/Packaging Improvement +* Enable Thread Fuzzer in Stress Test. Thread Fuzzer is ClickHouse feature that allows to test more permutations of thread scheduling and discover more potential issues. This closes [#9813](https://github.com/ClickHouse/ClickHouse/issues/9813). This closes [#9814](https://github.com/ClickHouse/ClickHouse/issues/9814). This closes [#9515](https://github.com/ClickHouse/ClickHouse/issues/9515). This closes [#9516](https://github.com/ClickHouse/ClickHouse/issues/9516). [#27538](https://github.com/ClickHouse/ClickHouse/pull/27538) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for build with `clang-13`. This closes [#27705](https://github.com/ClickHouse/ClickHouse/issues/27705). [#27714](https://github.com/ClickHouse/ClickHouse/pull/27714) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve support for build with `clang-13`. [#27777](https://github.com/ClickHouse/ClickHouse/pull/27777) ([Sergei Semin](https://github.com/syominsergey)). +* Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). +* Print out git status information at CMake configure stage. [#28047](https://github.com/ClickHouse/ClickHouse/pull/28047) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). +* Add new log level `` for testing environments. [#28559](https://github.com/ClickHouse/ClickHouse/pull/28559) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix handling null value with type of Nullable(String) in function JSONExtract. This fixes [#27929](https://github.com/ClickHouse/ClickHouse/issues/27929) and [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930) . This was introduced in https://github.com/ClickHouse/ClickHouse/pull/25452 . [#27939](https://github.com/ClickHouse/ClickHouse/pull/27939) ([Amos Bird](https://github.com/amosbird)). +* Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). +* Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). +* Fix bug in clickhouse-keeper which can lead to endless logs when `rotate_logs_interval` decreased. [#28152](https://github.com/ClickHouse/ClickHouse/pull/28152) ([alesapin](https://github.com/alesapin)). +* Multiple small fixes for projections. See detailed description in pr. [#28178](https://github.com/ClickHouse/ClickHouse/pull/28178) ([Amos Bird](https://github.com/amosbird)). +* Fix incorrect behavior in `clickhouse-keeper` when list watches (`getChildren`) triggered with `set` requests for children. [#28190](https://github.com/ClickHouse/ClickHouse/pull/28190) ([alesapin](https://github.com/alesapin)). +* Fix a rare bug in `clickhouse-keeper` when the client can receive a watch response before request-response. [#28197](https://github.com/ClickHouse/ClickHouse/pull/28197) ([alesapin](https://github.com/alesapin)). +* Fix possible read of uninitialized memory for queries with `Nullable(LowCardinality)` type and extremes. Fixes [#28165](https://github.com/ClickHouse/ClickHouse/issues/28165). [#28205](https://github.com/ClickHouse/ClickHouse/pull/28205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix reading of custom TLD w/o new line at EOF. [#28213](https://github.com/ClickHouse/ClickHouse/pull/28213) ([Azat Khuzhin](https://github.com/azat)). +* Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). +* Fix intersecting parts due to new part had been replaced with an empty part. [#28310](https://github.com/ClickHouse/ClickHouse/pull/28310) ([Azat Khuzhin](https://github.com/azat)). +* Fix NOT-IN index optimization when not all key columns are used. This fixes [#28120](https://github.com/ClickHouse/ClickHouse/issues/28120). [#28315](https://github.com/ClickHouse/ClickHouse/pull/28315) ([Amos Bird](https://github.com/amosbird)). +* Fix non joined rows from nullable column. Close [#27691](https://github.com/ClickHouse/ClickHouse/issues/27691). [#28349](https://github.com/ClickHouse/ClickHouse/pull/28349) ([Vladimir C](https://github.com/vdimir)). +* Fix rare case when changes of `clickhouse-keeper` settings may lead to lost logs and server hung. [#28360](https://github.com/ClickHouse/ClickHouse/pull/28360) ([alesapin](https://github.com/alesapin)). +* Fix lack of quotes for table names in MaterializedPostgreSQL engine. Closes [#28316](https://github.com/ClickHouse/ClickHouse/issues/28316). [#28433](https://github.com/ClickHouse/ClickHouse/pull/28433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed possible ZooKeeper watches leak on background processing of distributed DDL queue. Closes [#26036](https://github.com/ClickHouse/ClickHouse/issues/26036). [#28446](https://github.com/ClickHouse/ClickHouse/pull/28446) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after alter of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). +* Fix `There is no subcolumn` error, while select from tables, which have `Nested` columns and scalar columns with dot in name and the same prefix as `Nested` (e.g. `n.id UInt32, n.arr1 Array(UInt64), n.arr2 Array(UInt64)`). [#28531](https://github.com/ClickHouse/ClickHouse/pull/28531) ([Anton Popov](https://github.com/CurtizJ)). +* Fix UUID overlap in DROP TABLE for internal DDL from MaterializeMySQL. [#28533](https://github.com/ClickHouse/ClickHouse/pull/28533) ([Azat Khuzhin](https://github.com/azat)). +* Fix endless loop for truncated bzip2 archive. [#28543](https://github.com/ClickHouse/ClickHouse/pull/28543) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Add executable table function'. [#23192](https://github.com/ClickHouse/ClickHouse/pull/23192) ([ruct](https://github.com/ruct)). +* NO CL ENTRY: 'DOCSUP-12413: macros support in functions cluster and clusterAllReplicas'. [#27759](https://github.com/ClickHouse/ClickHouse/pull/27759) ([olgarev](https://github.com/olgarev)). +* NO CL ENTRY: 'Revert "less sys calls #2: make vdso work again"'. [#27829](https://github.com/ClickHouse/ClickHouse/pull/27829) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Do not miss exceptions from the ThreadPool"'. [#27844](https://github.com/ClickHouse/ClickHouse/pull/27844) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Improve 01730_distributed_group_by_no_merge_order_by_long"'. [#28128](https://github.com/ClickHouse/ClickHouse/pull/28128) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Revert "less sys calls #2: make vdso work again""'. [#28132](https://github.com/ClickHouse/ClickHouse/pull/28132) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Update src/Functions/GatherUtils/Sources.h'. [#28186](https://github.com/ClickHouse/ClickHouse/pull/28186) ([sdk2](https://github.com/sdk2)). +* NO CL ENTRY: 'Revert "Add test for [#13398](https://github.com/ClickHouse/ClickHouse/issues/13398)"'. [#28274](https://github.com/ClickHouse/ClickHouse/pull/28274) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'fix minor typo'. [#28629](https://github.com/ClickHouse/ClickHouse/pull/28629) ([flynn](https://github.com/ucasfl)). + diff --git a/docs/changelogs/v21.10.2.15-stable.md b/docs/changelogs/v21.10.2.15-stable.md new file mode 100644 index 00000000000..05e278f03ae --- /dev/null +++ b/docs/changelogs/v21.10.2.15-stable.md @@ -0,0 +1,65 @@ +### ClickHouse release v21.10.2.15-stable FIXME as compared to v21.10.1.8013-prestable + +#### Improvement +* Backported in [#29944](https://github.com/ClickHouse/ClickHouse/issues/29944): Update zoneinfo files to 2021c. [#29925](https://github.com/ClickHouse/ClickHouse/pull/29925) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#29776](https://github.com/ClickHouse/ClickHouse/issues/29776): Allow using a materialized column as the sharding key in a distributed table even if `insert_allow_materialized_columns=0`:. [#28637](https://github.com/ClickHouse/ClickHouse/pull/28637) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#28792](https://github.com/ClickHouse/ClickHouse/issues/28792): Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#29127](https://github.com/ClickHouse/ClickHouse/issues/29127): Fix bug in `clickhouse-keeper-converter` which can lead to incorrect ZooKeeper log deserialization. [#29071](https://github.com/ClickHouse/ClickHouse/pull/29071) ([小路](https://github.com/nicelulu)). +* Backported in [#29970](https://github.com/ClickHouse/ClickHouse/issues/29970): Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#30051](https://github.com/ClickHouse/ClickHouse/issues/30051): Fix releasing query ID and session ID at the end of query processing while handing gRPC call. This PR fixes flaky test [test_grpc_protocol/test.py::test_session](https://clickhouse-test-reports.s3.yandex.net/0/1ac03811a2df9717fa7c633d1af03def821d24b6/integration_tests_(memory).html). [#29954](https://github.com/ClickHouse/ClickHouse/pull/29954) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29054](https://github.com/ClickHouse/ClickHouse/issues/29054): Fix invalid constant type conversion when nullable or lowcardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#28795](https://github.com/ClickHouse/ClickHouse/issues/28795): - Fix the number of arguments required by s2RectAdd and s2RectContains functions. [#28663](https://github.com/ClickHouse/ClickHouse/pull/28663) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#28794](https://github.com/ClickHouse/ClickHouse/issues/28794): Add Settings.Names, Settings.Values aliases for system.processes table. [#28685](https://github.com/ClickHouse/ClickHouse/pull/28685) ([Vitaly Orlov](https://github.com/orloffv)). +* Backported in [#28793](https://github.com/ClickHouse/ClickHouse/issues/28793): Fix the coredump in the creation of distributed tables, when the parameters passed in are wrong. [#28686](https://github.com/ClickHouse/ClickHouse/pull/28686) ([Zhiyong Wang](https://github.com/ljcui)). +* Backported in [#28812](https://github.com/ClickHouse/ClickHouse/issues/28812): Fix possible crash for `SELECT` with partially created aggregate projection in case of exception. [#28700](https://github.com/ClickHouse/ClickHouse/pull/28700) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#28790](https://github.com/ClickHouse/ClickHouse/issues/28790): Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). +* Backported in [#29150](https://github.com/ClickHouse/ClickHouse/issues/29150): Fix extremely rare case when ReplicatedMergeTree replicas can diverge after hard reboot of all replicas. The error looks like `Part ... intersects (previous|next) part ...`. [#28817](https://github.com/ClickHouse/ClickHouse/pull/28817) ([alesapin](https://github.com/alesapin)). +* Backported in [#28843](https://github.com/ClickHouse/ClickHouse/issues/28843): Fix expressions compilation with short circuit evaluation. [#28821](https://github.com/ClickHouse/ClickHouse/pull/28821) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28992](https://github.com/ClickHouse/ClickHouse/issues/28992): Fixed a race condition between `DROP PART` and `REPLACE/MOVE PARTITION` that might cause replicas to diverge in rare cases. [#28864](https://github.com/ClickHouse/ClickHouse/pull/28864) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28950](https://github.com/ClickHouse/ClickHouse/issues/28950): Fix reading of subcolumns from compact parts. [#28873](https://github.com/ClickHouse/ClickHouse/pull/28873) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28926](https://github.com/ClickHouse/ClickHouse/issues/28926): Fix bug with LowCardinality in short-curcuit function evaluation. Closes [#28884](https://github.com/ClickHouse/ClickHouse/issues/28884). [#28887](https://github.com/ClickHouse/ClickHouse/pull/28887) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#29162](https://github.com/ClickHouse/ClickHouse/issues/29162): Fix queries to external databases (i.e. MySQL) with multiple columns in IN ( i.e. `(k,v) IN ((1, 2))` ) (but note that this has some backward incompatibility for the `clickhouse-copier` since it uses alias for tuple element). [#28888](https://github.com/ClickHouse/ClickHouse/pull/28888) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29108](https://github.com/ClickHouse/ClickHouse/issues/29108): Fix waiting for mutation with `mutations_sync=2`. [#28889](https://github.com/ClickHouse/ClickHouse/pull/28889) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28928](https://github.com/ClickHouse/ClickHouse/issues/28928): Fix higher-order array functions (`SIGSEGV` for `arrayCompact`/`ILLEGAL_COLUMN` for `arrayDifference`/`arrayCumSumNonNegative`) with consts. [#28904](https://github.com/ClickHouse/ClickHouse/pull/28904) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28951](https://github.com/ClickHouse/ClickHouse/issues/28951): Fix bad optimizations of ORDER BY if it contains WITH FILL. This closes [#28908](https://github.com/ClickHouse/ClickHouse/issues/28908). This closes [#26049](https://github.com/ClickHouse/ClickHouse/issues/26049). [#28910](https://github.com/ClickHouse/ClickHouse/pull/28910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#29023](https://github.com/ClickHouse/ClickHouse/issues/29023): Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29814](https://github.com/ClickHouse/ClickHouse/issues/29814): Do not allow to reuse previous credentials in case of inter-server secret (Before INSERT via Buffer/Kafka to Distributed table with interserver secret configured for that cluster, may re-use previously set user for that connection). [#29060](https://github.com/ClickHouse/ClickHouse/pull/29060) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29192](https://github.com/ClickHouse/ClickHouse/issues/29192): Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#29400](https://github.com/ClickHouse/ClickHouse/issues/29400): Send normal `Database doesn't exist error` (`UNKNOWN_DATABASE`) to the client (via TCP) instead of `Attempt to read after eof` (`ATTEMPT_TO_READ_AFTER_EOF`). [#29229](https://github.com/ClickHouse/ClickHouse/pull/29229) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29358](https://github.com/ClickHouse/ClickHouse/issues/29358): Fix possible `Table columns structure in ZooKeeper is different from local table structure` exception while recreating or creating new replicas of `ReplicatedMergeTree`, when one of table columns have default expressions with case-insensitive functions. [#29266](https://github.com/ClickHouse/ClickHouse/pull/29266) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#29450](https://github.com/ClickHouse/ClickHouse/issues/29450): Fix failed assertion in ReadBufferFromHDFS. Update libhdfs3 library to be able to run in tests in debug. Closes [#29251](https://github.com/ClickHouse/ClickHouse/issues/29251). Closes [#27814](https://github.com/ClickHouse/ClickHouse/issues/27814). [#29276](https://github.com/ClickHouse/ClickHouse/pull/29276) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29302](https://github.com/ClickHouse/ClickHouse/issues/29302): Fix connection timeouts (`send_timeout`/`receive_timeout`). [#29282](https://github.com/ClickHouse/ClickHouse/pull/29282) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29382](https://github.com/ClickHouse/ClickHouse/issues/29382): Remove window function `nth_value` as it is not memory-safe. This closes [#29347](https://github.com/ClickHouse/ClickHouse/issues/29347). [#29348](https://github.com/ClickHouse/ClickHouse/pull/29348) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#29439](https://github.com/ClickHouse/ClickHouse/issues/29439): Fix replicated access storage not shutting down cleanly when misconfigured. [#29388](https://github.com/ClickHouse/ClickHouse/pull/29388) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Backported in [#29488](https://github.com/ClickHouse/ClickHouse/issues/29488): Fix Logical error `Cannot capture columns` in functions greatest/least. Closes [#29334](https://github.com/ClickHouse/ClickHouse/issues/29334). [#29454](https://github.com/ClickHouse/ClickHouse/pull/29454) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#29537](https://github.com/ClickHouse/ClickHouse/issues/29537): Fix possible `Block structure mismatch` for subqueries with pushed-down `HAVING` predicate. Fixes [#29010](https://github.com/ClickHouse/ClickHouse/issues/29010). [#29475](https://github.com/ClickHouse/ClickHouse/pull/29475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29590](https://github.com/ClickHouse/ClickHouse/issues/29590): In ODBC bridge add retries for error Invalid cursor state. It is a retriable error. Closes [#29473](https://github.com/ClickHouse/ClickHouse/issues/29473). [#29518](https://github.com/ClickHouse/ClickHouse/pull/29518) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29571](https://github.com/ClickHouse/ClickHouse/issues/29571): Fix bug in check `pathStartsWith` becuase there was bug with the usage of `std::mismatch`: ` The behavior is undefined if the second range is shorter than the first range.`. [#29531](https://github.com/ClickHouse/ClickHouse/pull/29531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29725](https://github.com/ClickHouse/ClickHouse/issues/29725): Fix null deference for `GROUP BY WITH TOTALS HAVING` (when the column from `HAVING` wasn't selected). [#29553](https://github.com/ClickHouse/ClickHouse/pull/29553) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29629](https://github.com/ClickHouse/ClickHouse/issues/29629): Fix rare segfault in `ALTER MODIFY` query when using incorrect table identifier in `DEFAULT` expression like `x.y.z...` Fixes [#29184](https://github.com/ClickHouse/ClickHouse/issues/29184). [#29573](https://github.com/ClickHouse/ClickHouse/pull/29573) ([alesapin](https://github.com/alesapin)). +* Backported in [#29657](https://github.com/ClickHouse/ClickHouse/issues/29657): Fix JIT expression compilation with aliases and short-circuit expression evaluation. Closes [#29403](https://github.com/ClickHouse/ClickHouse/issues/29403). [#29574](https://github.com/ClickHouse/ClickHouse/pull/29574) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#29749](https://github.com/ClickHouse/ClickHouse/issues/29749): Condition in filter predicate could be lost after push-down optimisation. [#29625](https://github.com/ClickHouse/ClickHouse/pull/29625) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29850](https://github.com/ClickHouse/ClickHouse/issues/29850): Fix concurrent access to `LowCardinality` during `GROUP BY` (leads to SIGSEGV). [#29782](https://github.com/ClickHouse/ClickHouse/pull/29782) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29910](https://github.com/ClickHouse/ClickHouse/issues/29910): Fix bad cast in `ATTACH TABLE ... FROM 'path'` query when non-string literal is used instead of path. It may lead to reading of uninitialized memory. [#29790](https://github.com/ClickHouse/ClickHouse/pull/29790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#29866](https://github.com/ClickHouse/ClickHouse/issues/29866): Avoid `Timeout exceeded: elapsed 18446744073.709553 seconds` error that might happen in extremely rare cases, presumably due to some bug in kernel. Fixes [#29154](https://github.com/ClickHouse/ClickHouse/issues/29154). [#29811](https://github.com/ClickHouse/ClickHouse/pull/29811) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30023](https://github.com/ClickHouse/ClickHouse/issues/30023): MaterializedMySQL: Fix an issue where if the connection to MySQL was lost, only parts of a transaction could be processed. [#29837](https://github.com/ClickHouse/ClickHouse/pull/29837) ([Håvard Kvålen](https://github.com/havardk)). +* Backported in [#29876](https://github.com/ClickHouse/ClickHouse/issues/29876): Fix system tables recreation check (fails to detect changes in enum values). [#29857](https://github.com/ClickHouse/ClickHouse/pull/29857) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30058](https://github.com/ClickHouse/ClickHouse/issues/30058): Fix potential resource leak of the concurrent query limit of merge tree tables introduced in https://github.com/ClickHouse/ClickHouse/pull/19544 . [#29879](https://github.com/ClickHouse/ClickHouse/pull/29879) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#30208](https://github.com/ClickHouse/ClickHouse/issues/30208): Fix data-race between `LogSink::writeMarks()` and `LogSource` in `StorageLog`. [#29946](https://github.com/ClickHouse/ClickHouse/pull/29946) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30213](https://github.com/ClickHouse/ClickHouse/issues/30213): Fix possible data-race between `FileChecker` and `StorageLog`/`StorageStripeLog`. [#29959](https://github.com/ClickHouse/ClickHouse/pull/29959) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30067](https://github.com/ClickHouse/ClickHouse/issues/30067): Fix crash of sample by `tuple()`, closes [#30004](https://github.com/ClickHouse/ClickHouse/issues/30004). [#30016](https://github.com/ClickHouse/ClickHouse/pull/30016) ([flynn](https://github.com/ucasfl)). +* Backported in [#30127](https://github.com/ClickHouse/ClickHouse/issues/30127): Dropped `Memory` database might reappear after server restart, it's fixed ([#29795](https://github.com/ClickHouse/ClickHouse/issues/29795)). Also added `force_remove_data_recursively_on_drop` setting as a workaround for `Directory not empty` error when dropping `Ordinary` database (because it's not possible to remove data leftovers manually in cloud environment). [#30054](https://github.com/ClickHouse/ClickHouse/pull/30054) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30232](https://github.com/ClickHouse/ClickHouse/issues/30232): Fix INSERT SELECT incorrectly fills MATERIALIZED column based of Nullable column. [#30189](https://github.com/ClickHouse/ClickHouse/pull/30189) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30264](https://github.com/ClickHouse/ClickHouse/issues/30264): FlatDictionary, HashedDictionary fix bytes_allocated calculation for nullable attributes. [#30238](https://github.com/ClickHouse/ClickHouse/pull/30238) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#30307](https://github.com/ClickHouse/ClickHouse/issues/30307): Fix crash with shortcircuit and lowcardinality in multiIf. [#30243](https://github.com/ClickHouse/ClickHouse/pull/30243) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#30291](https://github.com/ClickHouse/ClickHouse/issues/30291): Fix ComplexKeyHashedDictionary, ComplexKeySparseHashedDictionary parsing `preallocate` option from layout config. [#30246](https://github.com/ClickHouse/ClickHouse/pull/30246) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NO CL CATEGORY + +* Avoid deadlocks when reading and writting on JOIN Engine tables at the same time. [#30182](https://github.com/ClickHouse/ClickHouse/pull/30182) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/docs/changelogs/v21.10.3.9-stable.md b/docs/changelogs/v21.10.3.9-stable.md new file mode 100644 index 00000000000..78240367d55 --- /dev/null +++ b/docs/changelogs/v21.10.3.9-stable.md @@ -0,0 +1,45 @@ +### ClickHouse release v21.10.3.9-stable FIXME as compared to v21.10.2.15-stable + +#### New Feature +* Backported in [#30712](https://github.com/ClickHouse/ClickHouse/issues/30712): CompiledExpressionCache limit elements size using `compiled_expression_cache_elements_size` setting. [#30667](https://github.com/ClickHouse/ClickHouse/pull/30667) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Improvement +* Backported in [#30453](https://github.com/ClickHouse/ClickHouse/issues/30453): Allow symlinks to files in user_files directory for file table function. [#30309](https://github.com/ClickHouse/ClickHouse/pull/30309) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#30635](https://github.com/ClickHouse/ClickHouse/issues/30635): More full support of positional arguments. [#30433](https://github.com/ClickHouse/ClickHouse/pull/30433) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Backported in [#30620](https://github.com/ClickHouse/ClickHouse/issues/30620): Fix reading from empty file on encrypted disk. [#30494](https://github.com/ClickHouse/ClickHouse/pull/30494) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#31369](https://github.com/ClickHouse/ClickHouse/issues/31369): Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release + +* Backported in [#30915](https://github.com/ClickHouse/ClickHouse/issues/30915): Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#30824](https://github.com/ClickHouse/ClickHouse/issues/30824): Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30766](https://github.com/ClickHouse/ClickHouse/issues/30766): Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Backported in [#30504](https://github.com/ClickHouse/ClickHouse/issues/30504): Fixed incorrect behaviour of setting `materialized_postgresql_tables_list` at server restart. Found in [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#29686](https://github.com/ClickHouse/ClickHouse/pull/29686) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#30137](https://github.com/ClickHouse/ClickHouse/issues/30137): Fix error `Port is already connected` for queries with `GLOBAL IN` and `WITH TOTALS`. Only for 21.9 and 21.10. [#30086](https://github.com/ClickHouse/ClickHouse/pull/30086) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#30465](https://github.com/ClickHouse/ClickHouse/issues/30465): Support nullable arguments in function `initializeAggregation`. [#30177](https://github.com/ClickHouse/ClickHouse/pull/30177) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#30353](https://github.com/ClickHouse/ClickHouse/issues/30353): Fix `pread_fake_async`/`pread_threadpool` with `min_bytes_to_use_direct_io`. [#30191](https://github.com/ClickHouse/ClickHouse/pull/30191) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30334](https://github.com/ClickHouse/ClickHouse/issues/30334): * Allow identifiers staring with numbers in multiple joins. [#30230](https://github.com/ClickHouse/ClickHouse/pull/30230) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#30654](https://github.com/ClickHouse/ClickHouse/issues/30654): Fix `[I]LIKE` function. Closes [#28661](https://github.com/ClickHouse/ClickHouse/issues/28661). [#30244](https://github.com/ClickHouse/ClickHouse/pull/30244) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#30378](https://github.com/ClickHouse/ClickHouse/issues/30378): fix replaceRegexpAll bug. [#30292](https://github.com/ClickHouse/ClickHouse/pull/30292) ([Memo](https://github.com/Joeywzr)). +* Backported in [#30526](https://github.com/ClickHouse/ClickHouse/issues/30526): Fixed segfault which might happen if session expired during execution of REPLACE PARTITION. [#30432](https://github.com/ClickHouse/ClickHouse/pull/30432) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30587](https://github.com/ClickHouse/ClickHouse/issues/30587): * Fix deadlock on ALTER with scalar subquery to the same table, close [#30461](https://github.com/ClickHouse/ClickHouse/issues/30461). [#30492](https://github.com/ClickHouse/ClickHouse/pull/30492) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#30608](https://github.com/ClickHouse/ClickHouse/issues/30608): Limit push down optimization could cause a error `Cannot find column`. Fixes [#30438](https://github.com/ClickHouse/ClickHouse/issues/30438). [#30562](https://github.com/ClickHouse/ClickHouse/pull/30562) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#30747](https://github.com/ClickHouse/ClickHouse/issues/30747): Update aws-sdk submodule for throttling in Yandex.S3. [#30646](https://github.com/ClickHouse/ClickHouse/pull/30646) ([ianton-ru](https://github.com/ianton-ru)). +* Backported in [#30751](https://github.com/ClickHouse/ClickHouse/issues/30751): Functions for case-insensitive search in UTF8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30709](https://github.com/ClickHouse/ClickHouse/issues/30709): Fix PREWHERE with WHERE in case of always true PREWHERE. [#30668](https://github.com/ClickHouse/ClickHouse/pull/30668) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30771](https://github.com/ClickHouse/ClickHouse/issues/30771): Fixed a race condition between `REPLACE/MOVE PARTITION` and background merge in non-replicated `MergeTree` that might cause a part of moved/replaced data to remain in partition. Fixes [#29327](https://github.com/ClickHouse/ClickHouse/issues/29327). [#30717](https://github.com/ClickHouse/ClickHouse/pull/30717) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30859](https://github.com/ClickHouse/ClickHouse/issues/30859): Fixed ambiguity when extracting auxiliary ZooKeeper name from ZooKeeper path in `ReplicatedMergeTree`. Previously server might fail to start with `Unknown auxiliary ZooKeeper name` if ZooKeeper path contains a colon. Fixes [#29052](https://github.com/ClickHouse/ClickHouse/issues/29052). Also it was allowed to specify ZooKeeper path that does not start with slash, but now it's deprecated and creation of new tables with such path is not allowed. Slashes and colons in auxiliary ZooKeeper names are not allowed too. [#30822](https://github.com/ClickHouse/ClickHouse/pull/30822) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30926](https://github.com/ClickHouse/ClickHouse/issues/30926): Fix set index not used in AND/OR expressions when there are more than two operands. This fixes [#30416](https://github.com/ClickHouse/ClickHouse/issues/30416) . [#30887](https://github.com/ClickHouse/ClickHouse/pull/30887) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#31289](https://github.com/ClickHouse/ClickHouse/issues/31289): Fix some corner cases with intersect/except. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31152](https://github.com/ClickHouse/ClickHouse/issues/31152): Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Backported in [#31041](https://github.com/ClickHouse/ClickHouse/issues/31041): Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31203](https://github.com/ClickHouse/ClickHouse/issues/31203): Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31373](https://github.com/ClickHouse/ClickHouse/issues/31373): Fix StorageMerge with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31255](https://github.com/ClickHouse/ClickHouse/issues/31255): Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). +* Backported in [#31436](https://github.com/ClickHouse/ClickHouse/issues/31436): Fix bug with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v21.10.4.26-stable.md b/docs/changelogs/v21.10.4.26-stable.md new file mode 100644 index 00000000000..b2089fad0c4 --- /dev/null +++ b/docs/changelogs/v21.10.4.26-stable.md @@ -0,0 +1,25 @@ +### ClickHouse release v21.10.4.26-stable FIXME as compared to v21.10.3.9-stable + +#### Performance Improvement +* Backported in [#31731](https://github.com/ClickHouse/ClickHouse/issues/31731): Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#31573](https://github.com/ClickHouse/ClickHouse/issues/31573): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#31518](https://github.com/ClickHouse/ClickHouse/issues/31518): Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Backported in [#31554](https://github.com/ClickHouse/ClickHouse/issues/31554): Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#31579](https://github.com/ClickHouse/ClickHouse/issues/31579): * Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). * Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#31745](https://github.com/ClickHouse/ClickHouse/issues/31745): `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31794](https://github.com/ClickHouse/ClickHouse/issues/31794): Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31830](https://github.com/ClickHouse/ClickHouse/issues/31830): Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31815](https://github.com/ClickHouse/ClickHouse/issues/31815): Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32016](https://github.com/ClickHouse/ClickHouse/issues/32016): Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31762](https://github.com/ClickHouse/ClickHouse/issues/31762): Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#31892](https://github.com/ClickHouse/ClickHouse/issues/31892): Fix possible assertion `../src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion '!hasPendingData()' failed.` in TSKV format. [#31804](https://github.com/ClickHouse/ClickHouse/pull/31804) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32031](https://github.com/ClickHouse/ClickHouse/issues/32031): Fix invalid cast of nullable type when nullable primary key is used. This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#32074](https://github.com/ClickHouse/ClickHouse/issues/32074): Fix a bug about function transform with decimal args. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([Shuai li](https://github.com/loneylee)). +* Backported in [#31939](https://github.com/ClickHouse/ClickHouse/issues/31939): - Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler` - Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). +* Backported in [#31909](https://github.com/ClickHouse/ClickHouse/issues/31909): Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v21.10.5.3-stable.md b/docs/changelogs/v21.10.5.3-stable.md new file mode 100644 index 00000000000..a591c4a07a8 --- /dev/null +++ b/docs/changelogs/v21.10.5.3-stable.md @@ -0,0 +1,15 @@ +### ClickHouse release v21.10.5.3-stable FIXME as compared to v21.10.4.26-stable + +#### Bug Fix +* Backported in [#32252](https://github.com/ClickHouse/ClickHouse/issues/32252): Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#32346](https://github.com/ClickHouse/ClickHouse/issues/32346): Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32151](https://github.com/ClickHouse/ClickHouse/issues/32151): Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32093](https://github.com/ClickHouse/ClickHouse/issues/32093): Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32147](https://github.com/ClickHouse/ClickHouse/issues/32147): Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32216](https://github.com/ClickHouse/ClickHouse/issues/32216): Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32352](https://github.com/ClickHouse/ClickHouse/issues/32352): Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32393](https://github.com/ClickHouse/ClickHouse/issues/32393): Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v21.10.6.2-stable.md b/docs/changelogs/v21.10.6.2-stable.md new file mode 100644 index 00000000000..da146ee364d --- /dev/null +++ b/docs/changelogs/v21.10.6.2-stable.md @@ -0,0 +1,22 @@ +### ClickHouse release v21.10.6.2-stable FIXME as compared to v21.10.5.3-stable + +#### Bug Fix +* Backported in [#32692](https://github.com/ClickHouse/ClickHouse/issues/32692): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32680](https://github.com/ClickHouse/ClickHouse/issues/32680): Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#32285](https://github.com/ClickHouse/ClickHouse/issues/32285): Dictionaries fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32730](https://github.com/ClickHouse/ClickHouse/issues/32730): Fix 'APPLY lambda' parsing which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32313](https://github.com/ClickHouse/ClickHouse/issues/32313): XML dictionaries identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32539](https://github.com/ClickHouse/ClickHouse/issues/32539): Some replication queue entries might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32709](https://github.com/ClickHouse/ClickHouse/issues/32709): Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#32567](https://github.com/ClickHouse/ClickHouse/issues/32567): Fix crash in `JoinCommon::removeColumnNullability`, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#32794](https://github.com/ClickHouse/ClickHouse/issues/32794): fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). +* Backported in [#33182](https://github.com/ClickHouse/ClickHouse/issues/33182): Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#33655](https://github.com/ClickHouse/ClickHouse/issues/33655): Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release): + +* Backported in [#32657](https://github.com/ClickHouse/ClickHouse/issues/32657): Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.11.1.8636-prestable.md b/docs/changelogs/v21.11.1.8636-prestable.md new file mode 100644 index 00000000000..ade8084055c --- /dev/null +++ b/docs/changelogs/v21.11.1.8636-prestable.md @@ -0,0 +1,196 @@ +### ClickHouse release v21.11.1.8636-prestable FIXME as compared to v21.10.1.8013-prestable + +#### Backward Incompatible Change +* Now ClickHouse Keeper snapshots compressed with `ZSTD` codec by default instead of custom ClickHouse LZ4 block compression. This behavior can be turned off with `compress_snapshots_with_zstd_format` coordination setting (must be equal on all quorum replicas). Backward incompatibility is quite rare and may happen only when new node will send snapshot (happens in case of recovery) to the old node which is unable to read snapshots in ZSTD format. [#29417](https://github.com/ClickHouse/ClickHouse/pull/29417) ([alesapin](https://github.com/alesapin)). +* Function `bayesAB` is removed. This closes [#26233](https://github.com/ClickHouse/ClickHouse/issues/26233). [#29934](https://github.com/ClickHouse/ClickHouse/pull/29934) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove MergeTree table setting `write_final_mark`. It will be always `true`. [#30455](https://github.com/ClickHouse/ClickHouse/pull/30455) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change order of json_path and json in sql json function (to be correct). Closes [#30449](https://github.com/ClickHouse/ClickHouse/issues/30449). [#30474](https://github.com/ClickHouse/ClickHouse/pull/30474) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove setting odbc_max_field_size because it is not used anymore. Closes [#30413](https://github.com/ClickHouse/ClickHouse/issues/30413). [#30778](https://github.com/ClickHouse/ClickHouse/pull/30778) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### New Feature +* Add CapnProto output format, refactor CapnProto input format. [#29291](https://github.com/ClickHouse/ClickHouse/pull/29291) ([Kruglov Pavel](https://github.com/Avogar)). +* clickhouse-format support option `--query`. [#29325](https://github.com/ClickHouse/ClickHouse/pull/29325) ([凌涛](https://github.com/lingtaolf)). +* Users now can set comments to database in `CREATE DATABASE` statement ... [#29429](https://github.com/ClickHouse/ClickHouse/pull/29429) ([Vasily Nemkov](https://github.com/Enmk)). +* New function` mapContainsKeyLike` to get the map that key matches a simple regular expression. [#29471](https://github.com/ClickHouse/ClickHouse/pull/29471) ([凌涛](https://github.com/lingtaolf)). +* Huawei OBS Storage support. Closes [#24294](https://github.com/ClickHouse/ClickHouse/issues/24294). [#29511](https://github.com/ClickHouse/ClickHouse/pull/29511) ([kevin wan](https://github.com/MaxWk)). +* Clickhouse HTTP Server can enable HSTS by set `hsts_max_age` in config.xml with a positive number. [#29516](https://github.com/ClickHouse/ClickHouse/pull/29516) ([凌涛](https://github.com/lingtaolf)). +* - Added MD4 and SHA384 functions. [#29602](https://github.com/ClickHouse/ClickHouse/pull/29602) ([Nikita Tikhomirov](https://github.com/NSTikhomirov)). +* Support EXISTS(subquery). Closes [#6852](https://github.com/ClickHouse/ClickHouse/issues/6852). [#29731](https://github.com/ClickHouse/ClickHouse/pull/29731) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added function `ngram`. Closes [#29699](https://github.com/ClickHouse/ClickHouse/issues/29699). [#29738](https://github.com/ClickHouse/ClickHouse/pull/29738) ([Maksim Kita](https://github.com/kitaisreal)). +* Returns String with OS Kernel version. [#29755](https://github.com/ClickHouse/ClickHouse/pull/29755) ([Memo](https://github.com/Joeywzr)). +* Predefined configuration for table function remote. Closes [#29756](https://github.com/ClickHouse/ClickHouse/issues/29756). [#29774](https://github.com/ClickHouse/ClickHouse/pull/29774) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `table` alias to system.tables and `database` alias to system.databases [#29677](https://github.com/ClickHouse/ClickHouse/issues/29677). [#29882](https://github.com/ClickHouse/ClickHouse/pull/29882) ([kevin wan](https://github.com/MaxWk)). +* Added columns `data_compressed_bytes`, `data_uncompressed_bytes`, `marks_bytes` into `system.data_skipping_indices`. Added columns `secondary_indices_compressed_bytes`, `secondary_indices_uncompressed_bytes`, `secondary_indices_marks_bytes` into `system.parts`. Closes [#29697](https://github.com/ClickHouse/ClickHouse/issues/29697). [#29896](https://github.com/ClickHouse/ClickHouse/pull/29896) ([Maksim Kita](https://github.com/kitaisreal)). +* User can now create dictionaries with comments: `CREATE DICTIONARY ... COMMENT 'vaue'` ... [#29899](https://github.com/ClickHouse/ClickHouse/pull/29899) ([Vasily Nemkov](https://github.com/Enmk)). +* Add support for FreeBSD aarch64. [#29952](https://github.com/ClickHouse/ClickHouse/pull/29952) ([MikaelUrankar](https://github.com/MikaelUrankar)). +* Added function `tokens`. That allow to split string into tokens using non-alpha numeric ASCII characters as separators. [#29981](https://github.com/ClickHouse/ClickHouse/pull/29981) ([Maksim Kita](https://github.com/kitaisreal)). +* Added new JSONExtractKeys function ... [#30056](https://github.com/ClickHouse/ClickHouse/pull/30056) ([Vitaly Orlov](https://github.com/orloffv)). +* Added `HashedArray` dictionary type. Closes [#30236](https://github.com/ClickHouse/ClickHouse/issues/30236). [#30242](https://github.com/ClickHouse/ClickHouse/pull/30242) ([Maksim Kita](https://github.com/kitaisreal)). +* Add ability to change nodes configuration (in `.xml` file) for ClickHouse Keeper. [#30372](https://github.com/ClickHouse/ClickHouse/pull/30372) ([alesapin](https://github.com/alesapin)). +* CompiledExpressionCache limit elements size using `compiled_expression_cache_elements_size` setting. [#30667](https://github.com/ClickHouse/ClickHouse/pull/30667) ([Maksim Kita](https://github.com/kitaisreal)). +* New function `mapExtractKeyLike` to get the map only kept elements matched specified pattern. [#30793](https://github.com/ClickHouse/ClickHouse/pull/30793) ([凌涛](https://github.com/lingtaolf)). +* Support `ALTER TABLE` for tables in `Memory` databases. [#30866](https://github.com/ClickHouse/ClickHouse/pull/30866) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### Performance Improvement +* * Remove branchy code in filter operation with a better implementation with popcnt/ctz which have better performance. [#29881](https://github.com/ClickHouse/ClickHouse/pull/29881) ([Jun Jin](https://github.com/vesslanjin)). +* - To take advantage of X86_64 feature, Use AVX2/AVX512 instructions to accelerate filter operation. [#30014](https://github.com/ClickHouse/ClickHouse/pull/30014) ([jasperzhu](https://github.com/jinjunzh)). +* Improve performance of aggregation in order of primary key (with enabled setting `optimize_aggregation_in_order`). [#30266](https://github.com/ClickHouse/ClickHouse/pull/30266) ([Anton Popov](https://github.com/CurtizJ)). +* ColumnDecimal improve performance for filter operation. [#30431](https://github.com/ClickHouse/ClickHouse/pull/30431) ([Jun Jin](https://github.com/vesslanjin)). +* Dictionaries support read from multiple threads. [#30500](https://github.com/ClickHouse/ClickHouse/pull/30500) ([Maksim Kita](https://github.com/kitaisreal)). +* - Improve filter bitmask generator function all in one with sse/avx2/avx512 instructions. [#30670](https://github.com/ClickHouse/ClickHouse/pull/30670) ([jasperzhu](https://github.com/jinjunzh)). +* Queries with `INTO OUTFILE` in `clickhouse-client` will use multiple threads. Fix the issue with flickering progress-bar when using `INTO OUTFILE`. This closes [#30873](https://github.com/ClickHouse/ClickHouse/issues/30873). This closes [#30872](https://github.com/ClickHouse/ClickHouse/issues/30872). [#30886](https://github.com/ClickHouse/ClickHouse/pull/30886) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Allow to write number in query as binary literal. Example `SELECT 0b001;`. [#29304](https://github.com/ClickHouse/ClickHouse/pull/29304) ([Maksim Kita](https://github.com/kitaisreal)). +* Pass through initial query_id for clickhouse-benchmark (previously if you run remote query via `clickhouse-benchmark`, queries on shards will not be linked to the initial query via `initial_query_id`). [#29364](https://github.com/ClickHouse/ClickHouse/pull/29364) ([Azat Khuzhin](https://github.com/azat)). +* fix lost in memory part when freeze. [#29376](https://github.com/ClickHouse/ClickHouse/pull/29376) ([Mo Xuan](https://github.com/mo-avatar)). +* Added setting `use_skip_indexes`. [#29405](https://github.com/ClickHouse/ClickHouse/pull/29405) ([Maksim Kita](https://github.com/kitaisreal)). +* Apply config changes to `max_concurrent_queries` during runtime (no need to restart). [#29414](https://github.com/ClickHouse/ClickHouse/pull/29414) ([Raúl Marín](https://github.com/Algunenano)). +* Transform `isNull`/`isNotNull` to `IS NULL`/`IS NOT NULL` (for external dbs, i.e. MySQL). [#29446](https://github.com/ClickHouse/ClickHouse/pull/29446) ([Azat Khuzhin](https://github.com/azat)). +* Enable per-query memory profiler (set to memory_profiler_step=4MiB) globally. [#29455](https://github.com/ClickHouse/ClickHouse/pull/29455) ([Azat Khuzhin](https://github.com/azat)). +* Add support for `IS NULL`/`IS NOT NULL` for external dbs (i.e. MySQL). [#29463](https://github.com/ClickHouse/ClickHouse/pull/29463) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `There is no query or query context has expired` error in mutations with nested subqueries. Do not allow subqueries in mutation if table is replicated and `allow_nondeterministic_mutations` setting is disabled. [#29495](https://github.com/ClickHouse/ClickHouse/pull/29495) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of pointless restriction on projection name. Now projection name can start with `tmp_`. [#29520](https://github.com/ClickHouse/ClickHouse/pull/29520) ([Amos Bird](https://github.com/amosbird)). +* Reload dictionaries, models, user defined executable functions if servers config `dictionaries_config`, `models_config`, `user_defined_executable_functions_config` changes. Closes [#28142](https://github.com/ClickHouse/ClickHouse/issues/28142). [#29529](https://github.com/ClickHouse/ClickHouse/pull/29529) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow user to change log levels without restart. [#29586](https://github.com/ClickHouse/ClickHouse/pull/29586) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase `listen_backlog` by default (to match default in newer linux kernel). [#29643](https://github.com/ClickHouse/ClickHouse/pull/29643) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect nullable processing of JSONFunctions. This fixes [#29615](https://github.com/ClickHouse/ClickHouse/issues/29615) . Mark as improvement because https://github.com/ClickHouse/ClickHouse/pull/28012 is not released. [#29659](https://github.com/ClickHouse/ClickHouse/pull/29659) ([Amos Bird](https://github.com/amosbird)). +* Fixed the issue: `clickhouse-format --obfuscate` cannot process queries with embedded dictionaries (functions `regionTo...`). [#29667](https://github.com/ClickHouse/ClickHouse/pull/29667) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Query obfuscator `clickhouse-format --obfuscate` now works with more types of queries. [#29672](https://github.com/ClickHouse/ClickHouse/pull/29672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `url` table function to process multiple URLs in parallel. This closes [#29670](https://github.com/ClickHouse/ClickHouse/issues/29670) and closes [#29671](https://github.com/ClickHouse/ClickHouse/issues/29671). [#29673](https://github.com/ClickHouse/ClickHouse/pull/29673) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ClickHouse logo to Play UI. [#29674](https://github.com/ClickHouse/ClickHouse/pull/29674) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Apply `max_untracked_memory`/`memory_profiler_step`/`memory_profiler_sample_probability` during mutate/merge. [#29681](https://github.com/ClickHouse/ClickHouse/pull/29681) ([Azat Khuzhin](https://github.com/azat)). +* Remove some redundant `seek` calls while reading compressed blocks in MergeTree table engines family. [#29766](https://github.com/ClickHouse/ClickHouse/pull/29766) ([alesapin](https://github.com/alesapin)). +* Web UI: render bars in table cells. [#29792](https://github.com/ClickHouse/ClickHouse/pull/29792) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added partitioned table prefix 'p' for the query for fetching replica identity index. [#29828](https://github.com/ClickHouse/ClickHouse/pull/29828) ([Shoh Jahon](https://github.com/Shohjahon)). +* Add ability to trace peak memory usage (with new trace_type - MemoryPeak). [#29858](https://github.com/ClickHouse/ClickHouse/pull/29858) ([Azat Khuzhin](https://github.com/azat)). +* Remove unused variable in s3cluster. [#29912](https://github.com/ClickHouse/ClickHouse/pull/29912) ([李扬](https://github.com/taiyang-li)). +* Add `shutdown_wait_unfinished_queries` server setting to allowing waiting for running queries up to `shutdown_wait_unfinished` time. This is for [#24451](https://github.com/ClickHouse/ClickHouse/issues/24451) . [#29914](https://github.com/ClickHouse/ClickHouse/pull/29914) ([Amos Bird](https://github.com/amosbird)). +* Now min-max aggregation over the first expression of primary key is optimized by projection. This is for https://github.com/ClickHouse/ClickHouse/issues/329. [#29918](https://github.com/ClickHouse/ClickHouse/pull/29918) ([Amos Bird](https://github.com/amosbird)). +* Add ability to configure retries and delays between them for `clickhouse-copier`. [#29921](https://github.com/ClickHouse/ClickHouse/pull/29921) ([Azat Khuzhin](https://github.com/azat)). +* Update zoneinfo files to 2021c. [#29925](https://github.com/ClickHouse/ClickHouse/pull/29925) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better exception message while reading column from Arrow-supported formats like `Arrow`, `ArrowStream`, `Parquet` and `ORC`. This closes [#29926](https://github.com/ClickHouse/ClickHouse/issues/29926). [#29927](https://github.com/ClickHouse/ClickHouse/pull/29927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lock-order-inversion between periodic dictionary reload and config reload. [#29928](https://github.com/ClickHouse/ClickHouse/pull/29928) ([Azat Khuzhin](https://github.com/azat)). +* Fix `lock-order-inversion` between `DROP TABLE` for `DatabaseMemory` and `LiveView`. [#29929](https://github.com/ClickHouse/ClickHouse/pull/29929) ([Azat Khuzhin](https://github.com/azat)). +* Fix data-race between flush() and startup() in StorageBuffer. [#29930](https://github.com/ClickHouse/ClickHouse/pull/29930) ([Azat Khuzhin](https://github.com/azat)). +* Now clickhouse use DNS cache while communicating with external S3. [#29999](https://github.com/ClickHouse/ClickHouse/pull/29999) ([alesapin](https://github.com/alesapin)). +* ProfileEvents::Counters snapshot doesn't store data in std::atomic anymore. [#30000](https://github.com/ClickHouse/ClickHouse/pull/30000) ([Dmitry Novik](https://github.com/novikd)). +* Add ability to print raw profile events to `clickhouse-client` (This can be useful for debugging and for testing). [#30064](https://github.com/ClickHouse/ClickHouse/pull/30064) ([Azat Khuzhin](https://github.com/azat)). +* Improve solution https://github.com/ClickHouse/ClickHouse/pull/28853 See also https://github.com/ClickHouse/ClickHouse/pull/29928. [#30084](https://github.com/ClickHouse/ClickHouse/pull/30084) ([Vitaly Baranov](https://github.com/vitlibar)). +* Reduce amount of redundant compressed data read from disk for some types `SELECT` queries (only for MergeTree engines family). [#30111](https://github.com/ClickHouse/ClickHouse/pull/30111) ([alesapin](https://github.com/alesapin)). +* PolygonDictionary added support for read method if setting `store_polygon_key_column` = true. Closes [#30090](https://github.com/ClickHouse/ClickHouse/issues/30090). [#30142](https://github.com/ClickHouse/ClickHouse/pull/30142) ([Maksim Kita](https://github.com/kitaisreal)). +* Now clickhouse-client supports native multi-line editing. [#30143](https://github.com/ClickHouse/ClickHouse/pull/30143) ([Amos Bird](https://github.com/amosbird)). +* Now `Keeper` (as part of `clickhouse-server`) will start asynchronously if it can connect to some other node. [#30170](https://github.com/ClickHouse/ClickHouse/pull/30170) ([alesapin](https://github.com/alesapin)). +* Support SQL user defined functions for clickhouse-local. [#30179](https://github.com/ClickHouse/ClickHouse/pull/30179) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to remove `SAMPLE BY` expression from `MergeTree` tables (`ALTER TABLE REMOVE SAMPLE BY`). [#30180](https://github.com/ClickHouse/ClickHouse/pull/30180) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed comparison of Date32 with Date, DateTime, DateTime64, String. [#30219](https://github.com/ClickHouse/ClickHouse/pull/30219) ([liang.huang](https://github.com/lhuang09287750)). +* Allow symlinks to files in user_files directory for file table function. [#30309](https://github.com/ClickHouse/ClickHouse/pull/30309) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Minor improvements in replica cloning and enqueuing fetch for broken parts, that should avoid extremely rare hanging of `GET_PART` entries in replication queue. [#30346](https://github.com/ClickHouse/ClickHouse/pull/30346) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added an ability to use FINAL clause in SELECT queries from GraphiteMergeTree. [#30360](https://github.com/ClickHouse/ClickHouse/pull/30360) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow extract non-string element as string using JSONExtractString. This is for https://github.com/ClickHouse/ClickHouse/pull/25452#issuecomment-927123287. [#30426](https://github.com/ClickHouse/ClickHouse/pull/30426) ([Amos Bird](https://github.com/amosbird)). +* More full support of positional arguments. [#30433](https://github.com/ClickHouse/ClickHouse/pull/30433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* SQLUserDefinedFunctions support lambdas. Example `CREATE FUNCTION lambda_function AS x -> arrayMap(element -> element * 2, x);`. [#30435](https://github.com/ClickHouse/ClickHouse/pull/30435) ([Maksim Kita](https://github.com/kitaisreal)). +* SQLUserDefinedFunctions added DROP IF EXISTS support. Example `DROP FUNCTION IF EXISTS test_function`. [#30437](https://github.com/ClickHouse/ClickHouse/pull/30437) ([Maksim Kita](https://github.com/kitaisreal)). +* SQLUserDefinedFunctions support `CREATE OR REPLACE`, `CREATE IF NOT EXISTS` syntaxes. [#30454](https://github.com/ClickHouse/ClickHouse/pull/30454) ([Maksim Kita](https://github.com/kitaisreal)). +* Make query, which fetched table structure for PostgreSQL database because, more reliable. [#30477](https://github.com/ClickHouse/ClickHouse/pull/30477) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Get memory amount with Docker/cgroups limitations. See [#25662](https://github.com/ClickHouse/ClickHouse/issues/25662). [#30574](https://github.com/ClickHouse/ClickHouse/pull/30574) ([Pavel Medvedev](https://github.com/pmed)). +* SQLUserDefinedFunctions support ON CLUSTER. Example `CREATE FUNCTION test_function ON CLUSTER 'cluster' AS x -> x + 1;`. Closes [#30666](https://github.com/ClickHouse/ClickHouse/issues/30666). [#30734](https://github.com/ClickHouse/ClickHouse/pull/30734) ([Maksim Kita](https://github.com/kitaisreal)). +* Arrays of all serializable types are now supported by arrayStringConcat. [#30840](https://github.com/ClickHouse/ClickHouse/pull/30840) ([Nikita Taranov](https://github.com/nickitat)). +* Allow to parse values of `Date` data type in text formats as `YYYYMMDD` in addition to `YYYY-MM-DD`. This closes [#30870](https://github.com/ClickHouse/ClickHouse/issues/30870). [#30871](https://github.com/ClickHouse/ClickHouse/pull/30871) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename the columns of SessionLog: `session_id` -> `auth_id`, `session_name` -> `session_id`, `changed_settings` -> `settings`. [#30882](https://github.com/ClickHouse/ClickHouse/pull/30882) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix +* Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix releasing query ID and session ID at the end of query processing while handing gRPC call. This PR fixes flaky test [test_grpc_protocol/test.py::test_session](https://clickhouse-test-reports.s3.yandex.net/0/1ac03811a2df9717fa7c633d1af03def821d24b6/integration_tests_(memory).html). [#29954](https://github.com/ClickHouse/ClickHouse/pull/29954) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix reading from empty file on encrypted disk. [#30494](https://github.com/ClickHouse/ClickHouse/pull/30494) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* N/A Detailed description / Documentation draft:. [#29367](https://github.com/ClickHouse/ClickHouse/pull/29367) ([kevin wan](https://github.com/MaxWk)). +* Add ability to compile using newer version fo glibc w/o using new symbols. [#29594](https://github.com/ClickHouse/ClickHouse/pull/29594) ([Azat Khuzhin](https://github.com/azat)). +* Turning on experimental constexpr expressions evaluator for clang to speed up template code compilation. [#29668](https://github.com/ClickHouse/ClickHouse/pull/29668) ([Mike Kot](https://github.com/myrrc)). +* Fix an build error because of [Rename "common" to "base"](https://github.com/ClickHouse/ClickHouse/commit/fe6b7c77c7d6bd2a45a20f3b6bb4eb91da6177ff). [#29688](https://github.com/ClickHouse/ClickHouse/pull/29688) ([Sergei Semin](https://github.com/syominsergey)). +* Leave only required files in cross-compile toolchains. Include them as submodules (earlier they were downloaded as tarballs). [#29974](https://github.com/ClickHouse/ClickHouse/pull/29974) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add cross-build for PowerPC (powerpc64le). This closes [#9589](https://github.com/ClickHouse/ClickHouse/issues/9589). Enable support for interaction with MySQL for AArch64 and PowerPC. This closes [#26301](https://github.com/ClickHouse/ClickHouse/issues/26301). [#30010](https://github.com/ClickHouse/ClickHouse/pull/30010) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable Protobuf, Arrow, ORC, Parquet for AArch64 and Darwin builds. This closes [#29248](https://github.com/ClickHouse/ClickHouse/issues/29248). This closes [#28018](https://github.com/ClickHouse/ClickHouse/issues/28018). [#30015](https://github.com/ClickHouse/ClickHouse/pull/30015) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add tests for encrypted disk & encryption codec and ReplicatedMergeTree. [#30172](https://github.com/ClickHouse/ClickHouse/pull/30172) ([Vitaly Baranov](https://github.com/vitlibar)). +* ClickHouse can be statically built with Musl. This is added as experiment, it does not support building `odbc-bridge`, `library-bridge`, integration with CatBoost and some libraries. [#30248](https://github.com/ClickHouse/ClickHouse/pull/30248) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Recursive submodules are no longer needed for ClickHouse. [#30315](https://github.com/ClickHouse/ClickHouse/pull/30315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added docker image to build docs. [#30499](https://github.com/ClickHouse/ClickHouse/pull/30499) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release + +* Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Fix vertical merges of projection parts. This fixes [#29253](https://github.com/ClickHouse/ClickHouse/issues/29253) . This PR also fixes several projection merge/mutation issues introduced in https://github.com/ClickHouse/ClickHouse/pull/25165. [#29337](https://github.com/ClickHouse/ClickHouse/pull/29337) ([Amos Bird](https://github.com/amosbird)). +* Remove window function `nth_value` as it is not memory-safe. This closes [#29347](https://github.com/ClickHouse/ClickHouse/issues/29347). [#29348](https://github.com/ClickHouse/ClickHouse/pull/29348) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix replicated access storage not shutting down cleanly when misconfigured. [#29388](https://github.com/ClickHouse/ClickHouse/pull/29388) ([Kevin Michel](https://github.com/kmichel-aiven)). +* rocksdb: fix race condition during multiple DB opening (and get back some tests that triggers the problem on CI). [#29393](https://github.com/ClickHouse/ClickHouse/pull/29393) ([Azat Khuzhin](https://github.com/azat)). +* Fix Logical error `Cannot capture columns` in functions greatest/least. Closes [#29334](https://github.com/ClickHouse/ClickHouse/issues/29334). [#29454](https://github.com/ClickHouse/ClickHouse/pull/29454) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible `Block structure mismatch` for subqueries with pushed-down `HAVING` predicate. Fixes [#29010](https://github.com/ClickHouse/ClickHouse/issues/29010). [#29475](https://github.com/ClickHouse/ClickHouse/pull/29475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed incorrect table name parsing on loading of `Lazy` database. Fixes [#29456](https://github.com/ClickHouse/ClickHouse/issues/29456). [#29476](https://github.com/ClickHouse/ClickHouse/pull/29476) ([Alexander Tokmakov](https://github.com/tavplubix)). +* In ODBC bridge add retries for error Invalid cursor state. It is a retriable error. Closes [#29473](https://github.com/ClickHouse/ClickHouse/issues/29473). [#29518](https://github.com/ClickHouse/ClickHouse/pull/29518) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug in check `pathStartsWith` becuase there was bug with the usage of `std::mismatch`: ` The behavior is undefined if the second range is shorter than the first range.`. [#29531](https://github.com/ClickHouse/ClickHouse/pull/29531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Avoid deadlocks when reading and writting on JOIN Engine tables at the same time. [#29544](https://github.com/ClickHouse/ClickHouse/pull/29544) ([Raúl Marín](https://github.com/Algunenano)). +* Fix null deference for `GROUP BY WITH TOTALS HAVING` (when the column from `HAVING` wasn't selected). [#29553](https://github.com/ClickHouse/ClickHouse/pull/29553) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare segfault in `ALTER MODIFY` query when using incorrect table identifier in `DEFAULT` expression like `x.y.z...` Fixes [#29184](https://github.com/ClickHouse/ClickHouse/issues/29184). [#29573](https://github.com/ClickHouse/ClickHouse/pull/29573) ([alesapin](https://github.com/alesapin)). +* Fix JIT expression compilation with aliases and short-circuit expression evaluation. Closes [#29403](https://github.com/ClickHouse/ClickHouse/issues/29403). [#29574](https://github.com/ClickHouse/ClickHouse/pull/29574) ([Maksim Kita](https://github.com/kitaisreal)). +* Condition in filter predicate could be lost after push-down optimisation. [#29625](https://github.com/ClickHouse/ClickHouse/pull/29625) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed incorrect behaviour of setting `materialized_postgresql_tables_list` at server restart. Found in [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#29686](https://github.com/ClickHouse/ClickHouse/pull/29686) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix incorrect `GROUP BY` (multiple rows with the same keys in result) in case of distributed query when shards had mixed versions `<= 21.3` and `>= 21.4`, `GROUP BY` key had several columns all with fixed size, and two-level aggregation was activated (see `group_by_two_level_threshold` and `group_by_two_level_threshold_bytes`). Fixes [#29580](https://github.com/ClickHouse/ClickHouse/issues/29580). [#29735](https://github.com/ClickHouse/ClickHouse/pull/29735) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix concurrent access to `LowCardinality` during `GROUP BY` (leads to SIGSEGV). [#29782](https://github.com/ClickHouse/ClickHouse/pull/29782) ([Azat Khuzhin](https://github.com/azat)). +* Fix bad cast in `ATTACH TABLE ... FROM 'path'` query when non-string literal is used instead of path. It may lead to reading of uninitialized memory. [#29790](https://github.com/ClickHouse/ClickHouse/pull/29790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid `Timeout exceeded: elapsed 18446744073.709553 seconds` error that might happen in extremely rare cases, presumably due to some bug in kernel. Fixes [#29154](https://github.com/ClickHouse/ClickHouse/issues/29154). [#29811](https://github.com/ClickHouse/ClickHouse/pull/29811) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MaterializedMySQL: Fix an issue where if the connection to MySQL was lost, only parts of a transaction could be processed. [#29837](https://github.com/ClickHouse/ClickHouse/pull/29837) ([Håvard Kvålen](https://github.com/havardk)). +* Fix system tables recreation check (fails to detect changes in enum values). [#29857](https://github.com/ClickHouse/ClickHouse/pull/29857) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential resource leak of the concurrent query limit of merge tree tables introduced in https://github.com/ClickHouse/ClickHouse/pull/19544 . [#29879](https://github.com/ClickHouse/ClickHouse/pull/29879) ([Amos Bird](https://github.com/amosbird)). +* Fix data-race between `LogSink::writeMarks()` and `LogSource` in `StorageLog`. [#29946](https://github.com/ClickHouse/ClickHouse/pull/29946) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible data-race between `FileChecker` and `StorageLog`/`StorageStripeLog`. [#29959](https://github.com/ClickHouse/ClickHouse/pull/29959) ([Azat Khuzhin](https://github.com/azat)). +* try to close issue: [#29965](https://github.com/ClickHouse/ClickHouse/issues/29965). [#29976](https://github.com/ClickHouse/ClickHouse/pull/29976) ([hexiaoting](https://github.com/hexiaoting)). +* Fix crash of sample by `tuple()`, closes [#30004](https://github.com/ClickHouse/ClickHouse/issues/30004). [#30016](https://github.com/ClickHouse/ClickHouse/pull/30016) ([flynn](https://github.com/ucasfl)). +* Dropped `Memory` database might reappear after server restart, it's fixed ([#29795](https://github.com/ClickHouse/ClickHouse/issues/29795)). Also added `force_remove_data_recursively_on_drop` setting as a workaround for `Directory not empty` error when dropping `Ordinary` database (because it's not possible to remove data leftovers manually in cloud environment). [#30054](https://github.com/ClickHouse/ClickHouse/pull/30054) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix race between MOVE PARTITION and merges/mutations for MergeTree. [#30074](https://github.com/ClickHouse/ClickHouse/pull/30074) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `Port is already connected` for queries with `GLOBAL IN` and `WITH TOTALS`. Only for 21.9 and 21.10. [#30086](https://github.com/ClickHouse/ClickHouse/pull/30086) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Support nullable arguments in function `initializeAggregation`. [#30177](https://github.com/ClickHouse/ClickHouse/pull/30177) ([Anton Popov](https://github.com/CurtizJ)). +* Fix INSERT SELECT incorrectly fills MATERIALIZED column based of Nullable column. [#30189](https://github.com/ClickHouse/ClickHouse/pull/30189) ([Azat Khuzhin](https://github.com/azat)). +* Fix `pread_fake_async`/`pread_threadpool` with `min_bytes_to_use_direct_io`. [#30191](https://github.com/ClickHouse/ClickHouse/pull/30191) ([Azat Khuzhin](https://github.com/azat)). +* Fix reading from `MergeTree` with `max_read_buffer_size=0` (can lead to `Can't adjust last granule` `LOGICAL_ERROR`, or even data loss). [#30192](https://github.com/ClickHouse/ClickHouse/pull/30192) ([Azat Khuzhin](https://github.com/azat)). +* * Allow identifiers staring with numbers in multiple joins. [#30230](https://github.com/ClickHouse/ClickHouse/pull/30230) ([Vladimir C](https://github.com/vdimir)). +* FlatDictionary, HashedDictionary fix bytes_allocated calculation for nullable attributes. [#30238](https://github.com/ClickHouse/ClickHouse/pull/30238) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash with shortcircuit and lowcardinality in multiIf. [#30243](https://github.com/ClickHouse/ClickHouse/pull/30243) ([Raúl Marín](https://github.com/Algunenano)). +* Fix `[I]LIKE` function. Closes [#28661](https://github.com/ClickHouse/ClickHouse/issues/28661). [#30244](https://github.com/ClickHouse/ClickHouse/pull/30244) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ComplexKeyHashedDictionary, ComplexKeySparseHashedDictionary parsing `preallocate` option from layout config. [#30246](https://github.com/ClickHouse/ClickHouse/pull/30246) ([Maksim Kita](https://github.com/kitaisreal)). +* fix replaceRegexpAll bug. [#30292](https://github.com/ClickHouse/ClickHouse/pull/30292) ([Memo](https://github.com/Joeywzr)). +* Fix column alias resolution of JOIN queries when projection is enabled. This fixes [#30146](https://github.com/ClickHouse/ClickHouse/issues/30146). [#30293](https://github.com/ClickHouse/ClickHouse/pull/30293) ([Amos Bird](https://github.com/amosbird)). +* Queries with condition like `IN (subquery)` could return incorrect result in case if aggregate projection applied. Fixed creation of sets for projections. [#30310](https://github.com/ClickHouse/ClickHouse/pull/30310) ([Amos Bird](https://github.com/amosbird)). +* Makes [#30162](https://github.com/ClickHouse/ClickHouse/issues/30162) less possible ... [#30370](https://github.com/ClickHouse/ClickHouse/pull/30370) ([Vasily Nemkov](https://github.com/Enmk)). +* Fixed segfault which might happen if session expired during execution of REPLACE PARTITION. [#30432](https://github.com/ClickHouse/ClickHouse/pull/30432) ([Alexander Tokmakov](https://github.com/tavplubix)). +* * Fix deadlock on ALTER with scalar subquery to the same table, close [#30461](https://github.com/ClickHouse/ClickHouse/issues/30461). [#30492](https://github.com/ClickHouse/ClickHouse/pull/30492) ([Vladimir C](https://github.com/vdimir)). +* Add missing parenthesis for `isNotNull`/`isNull` rewrites to `IS [NOT] NULL` (fixes queries that has something like `isNotNull(1)+isNotNull(2)`). [#30520](https://github.com/ClickHouse/ClickHouse/pull/30520) ([Azat Khuzhin](https://github.com/azat)). +* Limit push down optimization could cause a error `Cannot find column`. Fixes [#30438](https://github.com/ClickHouse/ClickHouse/issues/30438). [#30562](https://github.com/ClickHouse/ClickHouse/pull/30562) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update aws-sdk submodule for throttling in Yandex.S3. [#30646](https://github.com/ClickHouse/ClickHouse/pull/30646) ([ianton-ru](https://github.com/ianton-ru)). +* Functions for case-insensitive search in UTF8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix PREWHERE with WHERE in case of always true PREWHERE. [#30668](https://github.com/ClickHouse/ClickHouse/pull/30668) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a race condition between `REPLACE/MOVE PARTITION` and background merge in non-replicated `MergeTree` that might cause a part of moved/replaced data to remain in partition. Fixes [#29327](https://github.com/ClickHouse/ClickHouse/issues/29327). [#30717](https://github.com/ClickHouse/ClickHouse/pull/30717) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Clean temporary directory when localBackup failed by some reason. [#30797](https://github.com/ClickHouse/ClickHouse/pull/30797) ([ianton-ru](https://github.com/ianton-ru)). +* Fixed ambiguity when extracting auxiliary ZooKeeper name from ZooKeeper path in `ReplicatedMergeTree`. Previously server might fail to start with `Unknown auxiliary ZooKeeper name` if ZooKeeper path contains a colon. Fixes [#29052](https://github.com/ClickHouse/ClickHouse/issues/29052). Also it was allowed to specify ZooKeeper path that does not start with slash, but now it's deprecated and creation of new tables with such path is not allowed. Slashes and colons in auxiliary ZooKeeper names are not allowed too. [#30822](https://github.com/ClickHouse/ClickHouse/pull/30822) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix crash when projection with hashing function is materialized. This fixes [#30861](https://github.com/ClickHouse/ClickHouse/issues/30861) . The issue is similar to https://github.com/ClickHouse/ClickHouse/pull/28560 which is a lack of proper understanding of the invariant of header's emptyness. [#30877](https://github.com/ClickHouse/ClickHouse/pull/30877) ([Amos Bird](https://github.com/amosbird)). +* Fix set index not used in AND/OR expressions when there are more than two operands. This fixes [#30416](https://github.com/ClickHouse/ClickHouse/issues/30416) . [#30887](https://github.com/ClickHouse/ClickHouse/pull/30887) ([Amos Bird](https://github.com/amosbird)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Generate test_results.tsv for fasttest'. [#29319](https://github.com/ClickHouse/ClickHouse/pull/29319) ([Dmitry Novik](https://github.com/novikd)). +* NO CL ENTRY: 'Revert "Add coroutines example."'. [#29829](https://github.com/ClickHouse/ClickHouse/pull/29829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Link FAQ from Debian installation'. [#29836](https://github.com/ClickHouse/ClickHouse/pull/29836) ([Federico Ceratto](https://github.com/FedericoCeratto)). +* NO CL ENTRY: 'add support of window function in antlr grammar'. [#30181](https://github.com/ClickHouse/ClickHouse/pull/30181) ([PHaroZ](https://github.com/PHaroZ)). +* NO CL ENTRY: 'Revert "Fix style regressions on benchmark page"'. [#30652](https://github.com/ClickHouse/ClickHouse/pull/30652) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Improve usability of `remote_url_allow_hosts`"'. [#30707](https://github.com/ClickHouse/ClickHouse/pull/30707) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "Revert "Improve usability of `remote_url_allow_hosts`""'. [#30708](https://github.com/ClickHouse/ClickHouse/pull/30708) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'remove some unneeded header files'. [#30722](https://github.com/ClickHouse/ClickHouse/pull/30722) ([flynn](https://github.com/ucasfl)). + +#### Testing Improvement + +* Implemented structure-aware fuzzing approach in ClickHouse for select statement parser. [#30012](https://github.com/ClickHouse/ClickHouse/pull/30012) ([Paul](https://github.com/PaulCher)). + diff --git a/docs/changelogs/v21.11.10.1-stable.md b/docs/changelogs/v21.11.10.1-stable.md new file mode 100644 index 00000000000..f07eccd30c5 --- /dev/null +++ b/docs/changelogs/v21.11.10.1-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v21.11.10.1-stable FIXME as compared to v21.11.9.1-stable + diff --git a/docs/changelogs/v21.11.11.1-stable.md b/docs/changelogs/v21.11.11.1-stable.md new file mode 100644 index 00000000000..e46f43c53e0 --- /dev/null +++ b/docs/changelogs/v21.11.11.1-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v21.11.11.1-stable FIXME as compared to v21.11.10.1-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#33656](https://github.com/ClickHouse/ClickHouse/issues/33656): Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v21.11.2.2-stable.md b/docs/changelogs/v21.11.2.2-stable.md new file mode 100644 index 00000000000..f48f91a9b13 --- /dev/null +++ b/docs/changelogs/v21.11.2.2-stable.md @@ -0,0 +1,8 @@ +### ClickHouse release v21.11.2.2-stable FIXME as compared to v21.11.1.8636-prestable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#31154](https://github.com/ClickHouse/ClickHouse/issues/31154): Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Backported in [#31027](https://github.com/ClickHouse/ClickHouse/issues/31027): Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31026](https://github.com/ClickHouse/ClickHouse/issues/31026): Fix JSONValue/Query with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v21.11.3.6-stable.md b/docs/changelogs/v21.11.3.6-stable.md new file mode 100644 index 00000000000..bf46ecec590 --- /dev/null +++ b/docs/changelogs/v21.11.3.6-stable.md @@ -0,0 +1,10 @@ +### ClickHouse release v21.11.3.6-stable FIXME as compared to v21.11.2.2-stable + +#### Bug Fix +* Backported in [#31246](https://github.com/ClickHouse/ClickHouse/issues/31246): Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#31206](https://github.com/ClickHouse/ClickHouse/issues/31206): Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31202](https://github.com/ClickHouse/ClickHouse/issues/31202): Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v21.11.4.14-stable.md b/docs/changelogs/v21.11.4.14-stable.md new file mode 100644 index 00000000000..b3d44b8c193 --- /dev/null +++ b/docs/changelogs/v21.11.4.14-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v21.11.4.14-stable FIXME as compared to v21.11.3.6-stable + +#### Bug Fix +* Backported in [#31370](https://github.com/ClickHouse/ClickHouse/issues/31370): Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#31282](https://github.com/ClickHouse/ClickHouse/issues/31282): Fix some corner cases with intersect/except. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31237](https://github.com/ClickHouse/ClickHouse/issues/31237): Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31374](https://github.com/ClickHouse/ClickHouse/issues/31374): Fix StorageMerge with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31236](https://github.com/ClickHouse/ClickHouse/issues/31236): Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). +* Backported in [#31435](https://github.com/ClickHouse/ClickHouse/issues/31435): Fix bug with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Imrovement (changelog entry is not required) + +* Backported in [#31283](https://github.com/ClickHouse/ClickHouse/issues/31283): Rename setting value `read_threadpool` to `threadpool` for setting `remote_filesystem_read_method`. [#31224](https://github.com/ClickHouse/ClickHouse/pull/31224) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v21.11.5.33-stable.md b/docs/changelogs/v21.11.5.33-stable.md new file mode 100644 index 00000000000..973c443d9f8 --- /dev/null +++ b/docs/changelogs/v21.11.5.33-stable.md @@ -0,0 +1,33 @@ +### ClickHouse release v21.11.5.33-stable FIXME as compared to v21.11.4.14-stable + +#### Performance Improvement +* Backported in [#31735](https://github.com/ClickHouse/ClickHouse/issues/31735): Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#31572](https://github.com/ClickHouse/ClickHouse/issues/31572): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#31517](https://github.com/ClickHouse/ClickHouse/issues/31517): Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Backported in [#31551](https://github.com/ClickHouse/ClickHouse/issues/31551): Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#31580](https://github.com/ClickHouse/ClickHouse/issues/31580): * Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). * Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#31600](https://github.com/ClickHouse/ClickHouse/issues/31600): Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Backported in [#31593](https://github.com/ClickHouse/ClickHouse/issues/31593): All non-x86 builds were broken, because we don't have tests for them. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#31795](https://github.com/ClickHouse/ClickHouse/issues/31795): Fix sparkbars are not aligned, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)). +* Backported in [#31747](https://github.com/ClickHouse/ClickHouse/issues/31747): `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31791](https://github.com/ClickHouse/ClickHouse/issues/31791): Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31895](https://github.com/ClickHouse/ClickHouse/issues/31895): * Fixed function ngrams when string contains utf8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)). +* Backported in [#31831](https://github.com/ClickHouse/ClickHouse/issues/31831): Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31814](https://github.com/ClickHouse/ClickHouse/issues/31814): Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32015](https://github.com/ClickHouse/ClickHouse/issues/32015): Fixed rare segfault on concurrent `ATTACH PARTITION` queries. [#31738](https://github.com/ClickHouse/ClickHouse/pull/31738) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32017](https://github.com/ClickHouse/ClickHouse/issues/32017): Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31760](https://github.com/ClickHouse/ClickHouse/issues/31760): Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#31891](https://github.com/ClickHouse/ClickHouse/issues/31891): Fix possible assertion `../src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion '!hasPendingData()' failed.` in TSKV format. [#31804](https://github.com/ClickHouse/ClickHouse/pull/31804) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32029](https://github.com/ClickHouse/ClickHouse/issues/32029): Fix invalid cast of nullable type when nullable primary key is used. This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#31920](https://github.com/ClickHouse/ClickHouse/issues/31920): Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#32076](https://github.com/ClickHouse/ClickHouse/issues/32076): Fix a bug about function transform with decimal args. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([Shuai li](https://github.com/loneylee)). +* Backported in [#31938](https://github.com/ClickHouse/ClickHouse/issues/31938): - Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler` - Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). +* Backported in [#31908](https://github.com/ClickHouse/ClickHouse/issues/31908): Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#32091](https://github.com/ClickHouse/ClickHouse/issues/32091): Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32020](https://github.com/ClickHouse/ClickHouse/issues/32020): Fix FileLog engine unnesessary create meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)). + diff --git a/docs/changelogs/v21.11.6.7-stable.md b/docs/changelogs/v21.11.6.7-stable.md new file mode 100644 index 00000000000..1f3df589466 --- /dev/null +++ b/docs/changelogs/v21.11.6.7-stable.md @@ -0,0 +1,24 @@ +### ClickHouse release v21.11.6.7-stable FIXME as compared to v21.11.5.33-stable + +#### Bug Fix +* Backported in [#32254](https://github.com/ClickHouse/ClickHouse/issues/32254): Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#32345](https://github.com/ClickHouse/ClickHouse/issues/32345): Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32152](https://github.com/ClickHouse/ClickHouse/issues/32152): Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32298](https://github.com/ClickHouse/ClickHouse/issues/32298): Fix recursive user defined functions crash. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32148](https://github.com/ClickHouse/ClickHouse/issues/32148): Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32212](https://github.com/ClickHouse/ClickHouse/issues/32212): Fix `CAST` from `Nullable` with `cast_keep_nullable` (`PARAMETER_OUT_OF_BOUND` error before for i.e. `toUInt32OrDefault(toNullable(toUInt32(1)))`). [#32080](https://github.com/ClickHouse/ClickHouse/pull/32080) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#32283](https://github.com/ClickHouse/ClickHouse/issues/32283): Dictionaries fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32482](https://github.com/ClickHouse/ClickHouse/issues/32482): Fix 'APPLY lambda' parsing which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32217](https://github.com/ClickHouse/ClickHouse/issues/32217): Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32311](https://github.com/ClickHouse/ClickHouse/issues/32311): XML dictionaries identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32354](https://github.com/ClickHouse/ClickHouse/issues/32354): Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32392](https://github.com/ClickHouse/ClickHouse/issues/32392): Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#32418](https://github.com/ClickHouse/ClickHouse/issues/32418): Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Manual backport of [#31766](https://github.com/ClickHouse/ClickHouse/issues/31766) into 21.11'. [#32202](https://github.com/ClickHouse/ClickHouse/pull/32202) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/docs/changelogs/v21.11.7.9-stable.md b/docs/changelogs/v21.11.7.9-stable.md new file mode 100644 index 00000000000..1d907ad0ce1 --- /dev/null +++ b/docs/changelogs/v21.11.7.9-stable.md @@ -0,0 +1,15 @@ +### ClickHouse release v21.11.7.9-stable FIXME as compared to v21.11.6.7-stable + +#### Bug Fix +* Backported in [#32691](https://github.com/ClickHouse/ClickHouse/issues/32691): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32711](https://github.com/ClickHouse/ClickHouse/issues/32711): Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#32568](https://github.com/ClickHouse/ClickHouse/issues/32568): Fix crash in `JoinCommon::removeColumnNullability`, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#32732](https://github.com/ClickHouse/ClickHouse/issues/32732): Fix surprisingly bad code in function `file`. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release): + +* Backported in [#32617](https://github.com/ClickHouse/ClickHouse/issues/32617): Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.11.8.4-stable.md b/docs/changelogs/v21.11.8.4-stable.md new file mode 100644 index 00000000000..0826b473758 --- /dev/null +++ b/docs/changelogs/v21.11.8.4-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.11.8.4-stable FIXME as compared to v21.11.7.9-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32679](https://github.com/ClickHouse/ClickHouse/issues/32679): Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#32543](https://github.com/ClickHouse/ClickHouse/issues/32543): Some replication queue entries might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32769](https://github.com/ClickHouse/ClickHouse/issues/32769): Fix sparse_hashed dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#32635](https://github.com/ClickHouse/ClickHouse/issues/32635): Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#32633](https://github.com/ClickHouse/ClickHouse/issues/32633): Fix possible exception at RabbitMQ storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#32891](https://github.com/ClickHouse/ClickHouse/issues/32891): Fix LOGICAL_ERROR when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#32792](https://github.com/ClickHouse/ClickHouse/issues/32792): fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). + diff --git a/docs/changelogs/v21.11.9.1-stable.md b/docs/changelogs/v21.11.9.1-stable.md new file mode 100644 index 00000000000..c1754614c3c --- /dev/null +++ b/docs/changelogs/v21.11.9.1-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v21.11.9.1-stable FIXME as compared to v21.11.8.4-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#33181](https://github.com/ClickHouse/ClickHouse/issues/33181): Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v21.12.1.9017-prestable.md b/docs/changelogs/v21.12.1.9017-prestable.md new file mode 100644 index 00000000000..bcf5424fc63 --- /dev/null +++ b/docs/changelogs/v21.12.1.9017-prestable.md @@ -0,0 +1,206 @@ +### ClickHouse release v21.12.1.9017-prestable FIXME as compared to v21.11.1.8636-prestable + +#### Backward Incompatible Change +* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)). +* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enables by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting in case there is attached materialized view. For Kafka and RabbitMQ direct select if allowed, will not commit massages by default. To enable commits with direct select user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). cc @filimonov. [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)). +* A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from older version and some replica with old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### New Feature +* Support for Stream Processing. [#8331](https://github.com/ClickHouse/ClickHouse/pull/8331) ([vxider](https://github.com/Vxider)). +* - Added CONSTRAINT ... ASSUME ... (without checking during INSERT) - Added query transformation to CNF (https://github.com/ClickHouse/ClickHouse/issues/11749) for more convenient optimization - Added simple query rewriting using constraints (only simple matching now, will be improved to support <,=,>... later) - Added ability to replace heavy columns with light - Added ability to use the index in queries. [#18787](https://github.com/ClickHouse/ClickHouse/pull/18787) ([Nikita Vasilev](https://github.com/nikvas0)). +* * Add Map combinator for `Map` type. * Rename old `sum-, min-, max- Map` for mapped arrays to `sum-, min-, max- MappedArrays`. [#24539](https://github.com/ClickHouse/ClickHouse/pull/24539) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Support `JOIN ON 1 = 1` that have CROSS JOIN semantic, close [#25578](https://github.com/ClickHouse/ClickHouse/issues/25578). [#25894](https://github.com/ClickHouse/ClickHouse/pull/25894) ([Vladimir C](https://github.com/vdimir)). +* Adding function `getFuzzerData()` to easily fuzz particular functions. This closes [#23227](https://github.com/ClickHouse/ClickHouse/issues/23227). [#27526](https://github.com/ClickHouse/ClickHouse/pull/27526) ([Alexey Boykov](https://github.com/mathalex)). +* This closes [#28774](https://github.com/ClickHouse/ClickHouse/issues/28774). [#28965](https://github.com/ClickHouse/ClickHouse/pull/28965) ([小路](https://github.com/nicelulu)). +* We need to implement similar commands in clickhouse-keeper: https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands. [#28981](https://github.com/ClickHouse/ClickHouse/pull/28981) ([JackyWoo](https://github.com/JackyWoo)). +* Add option to compress logs before writing them to a file using LZ4. Closes [#23860](https://github.com/ClickHouse/ClickHouse/issues/23860). [#29219](https://github.com/ClickHouse/ClickHouse/pull/29219) ([Nikolay Degterinsky](https://github.com/evillique)). +* Introduced window functions: - `exponentialTimeDecayedSum` - `exponentialTimeDecayedMax` - `exponentialTimeDecayedCount` - `exponentialTimeDecayedAvg` which are more effective than `exponentialMovingAverage` for bigger windows. Also more use-cases were covered. [#29799](https://github.com/ClickHouse/ClickHouse/pull/29799) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Support for PARTITION BY in File, URL, HDFS storages and with INSERT INTO table function. Closes [#30273](https://github.com/ClickHouse/ClickHouse/issues/30273). [#30690](https://github.com/ClickHouse/ClickHouse/pull/30690) ([Kseniia Sumarokova](https://github.com/kssenii)). +* support bool data type. [#31072](https://github.com/ClickHouse/ClickHouse/pull/31072) ([kevin wan](https://github.com/MaxWk)). +* Exposes all GlobalThreadPool configurations to the configuration files. [#31285](https://github.com/ClickHouse/ClickHouse/pull/31285) ([Tomáš Hromada](https://github.com/gyfis)). +* Aliyun OSS Storage support. [#31286](https://github.com/ClickHouse/ClickHouse/pull/31286) ([cfcz48](https://github.com/cfcz48)). +* Allow to print/parse names and types of colums in CustomSeparated input/output format. Add formats CustomSeparatedWithNames/WithNamesAndTypes similar to TSVWithNames/WithNamesAndTypes. [#31434](https://github.com/ClickHouse/ClickHouse/pull/31434) ([Kruglov Pavel](https://github.com/Avogar)). +* - Basic access authentication for http/url functions. [#31648](https://github.com/ClickHouse/ClickHouse/pull/31648) ([michael1589](https://github.com/michael1589)). + +#### Performance Improvement +* ... Allow to split GraphiteMergeTree rollup rules for plain/tagged metrics (optional rule_type field). [#25122](https://github.com/ClickHouse/ClickHouse/pull/25122) ([Michail Safronov](https://github.com/msaf1980)). +* Fixing query performance issue in `LiveView` tables. Fixes [#30831](https://github.com/ClickHouse/ClickHouse/issues/30831). [#31006](https://github.com/ClickHouse/ClickHouse/pull/31006) ([vzakaznikov](https://github.com/vzakaznikov)). +* Improve performance of syncing data to block device. This closes [#31181](https://github.com/ClickHouse/ClickHouse/issues/31181). [#31229](https://github.com/ClickHouse/ClickHouse/pull/31229) ([zhanglistar](https://github.com/zhanglistar)). +* Support parallel formatting for all text formats, except `JSONEachRowWithProgress` and `PrettyCompactMonoBlock`. [#31489](https://github.com/ClickHouse/ClickHouse/pull/31489) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speedup avg and sumCount aggregate functions. [#31694](https://github.com/ClickHouse/ClickHouse/pull/31694) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up count over nullable columns. [#31806](https://github.com/ClickHouse/ClickHouse/pull/31806) ([Raúl Marín](https://github.com/Algunenano)). +* Speedup query parsing. [#31949](https://github.com/ClickHouse/ClickHouse/pull/31949) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Enable clang `-fstrict-vtable-pointers`, `-fwhole-program-vtables` compile options. [#20151](https://github.com/ClickHouse/ClickHouse/pull/20151) ([Maksim Kita](https://github.com/kitaisreal)). +* Skipping mutations of different partitions in `StorageMergeTree`. [#21326](https://github.com/ClickHouse/ClickHouse/pull/21326) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Closes [#12552](https://github.com/ClickHouse/ClickHouse/issues/12552). Allow versioning of aggregate function states. [#24820](https://github.com/ClickHouse/ClickHouse/pull/24820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add optimizations for constant conditions in JOIN ON, ref [#26928](https://github.com/ClickHouse/ClickHouse/issues/26928). [#27021](https://github.com/ClickHouse/ClickHouse/pull/27021) ([Vladimir C](https://github.com/vdimir)). +* Add support for `Identifier` table and database query parameters. Closes [#27226](https://github.com/ClickHouse/ClickHouse/issues/27226). [#28668](https://github.com/ClickHouse/ClickHouse/pull/28668) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow to specify one or any number of PostgreSQL schemas for one MaterializedPostgreSQL database. Closes [#28901](https://github.com/ClickHouse/ClickHouse/issues/28901). Closes [#29324](https://github.com/ClickHouse/ClickHouse/issues/29324). [#28933](https://github.com/ClickHouse/ClickHouse/pull/28933) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make reading from HTTP retriable. Closes [#29696](https://github.com/ClickHouse/ClickHouse/issues/29696). [#29894](https://github.com/ClickHouse/ClickHouse/pull/29894) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add support for parallel reading from multiple files and support globs in `FROM INFILE` clause. [#30135](https://github.com/ClickHouse/ClickHouse/pull/30135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* - Refactor formats TSV, TSVRaw, CSV and JSONCompactEachRow, JSONCompactStringsEachRow, remove code duplication, add base interface for formats with -WithNames and -WithNamesAndTypes suffixes. - Add formats CSVWithNamesAndTypes, TSVRawWithNames, TSVRawWithNamesAndTypes, JSONCompactEachRowWIthNames, JSONCompactStringsEachRowWIthNames, RowBinaryWithNames - Support parallel parsing for formats TSVWithNamesAndTypes, TSVRaw(WithNames/WIthNamesAndTypes), CSVWithNamesAndTypes, JSONCompactEachRow(WithNames/WIthNamesAndTypes), JSONCompactStringsEachRow(WithNames/WIthNamesAndTypes). - Support columns mapping and types checking for RowBinaryWithNamesAndTypes format. - Add setting `input_format_with_types_use_header` which specify if we should check that types written in WIthNamesAndTypes format matches with table structure. - Add setting `input_format_csv_empty_as_default` and use it in CSV format instead of `input_format_defaults_for_omitted_fields` (because this setting should't control `csv_empty_as_default`). - Fix usage of setting `input_format_defaults_for_omitted_fields` (it was used only as `csv_empty_as_default`, but it should control calculation of default expressions for omitted fields) - Fix Nullable input/output in TSVRaw format, make this format fully compatible with inserting into TSV. - Fix inserting NULLs in LowCardinality(Nullable) when `input_format_null_as_default` is enabled (previously default values was inserted instead of actual NULLs). - Fix strings deserialization in JSONStringsEachRow/JSONCompactStringsEachRow formats (strings were parsed just until first '\n' or '\t') - Add ability to use `Raw` escaping rule in Template input format. - Add diagnostic info for JSONCompactEachRow(WithNames/WIthNamesAndTypes) input format. - Fix bug with parallel parsing of -WithNames formats in case when setting min_chunk_bytes_for_parallel_parsing is less than bytes in a single row. [#30178](https://github.com/ClickHouse/ClickHouse/pull/30178) ([Kruglov Pavel](https://github.com/Avogar)). +* Avro format works against Kafka. Setting `output_format_avro_rows_in_file` added. [#30351](https://github.com/ClickHouse/ClickHouse/pull/30351) ([Ilya Golshtein](https://github.com/ilejn)). +* Implement the commands BACKUP and RESTORE for the Log family. [#30688](https://github.com/ClickHouse/ClickHouse/pull/30688) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible "The local set of parts of X doesn't look like the set of parts in ZooKeeper" error (if DROP fails during removing znodes from zookeeper). [#30826](https://github.com/ClickHouse/ClickHouse/pull/30826) ([Azat Khuzhin](https://github.com/azat)). +* For clickhouse-local or clickhouse-client if there is --interactive option with --query or --queries-file, then first execute them like in non-interactive and then start interactive mode. [#30851](https://github.com/ClickHouse/ClickHouse/pull/30851) ([Kseniia Sumarokova](https://github.com/kssenii)). +* added \l, \d, \c aliases like in MySQL. [#30876](https://github.com/ClickHouse/ClickHouse/pull/30876) ([Pavel Medvedev](https://github.com/pmed)). +* Fix `--verbose` option in clickhouse-local interactive mode and allow logging into file. [#30881](https://github.com/ClickHouse/ClickHouse/pull/30881) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support `INTERVAL` type in `STEP` clause for `WITH FILL` modifier. [#30927](https://github.com/ClickHouse/ClickHouse/pull/30927) ([Anton Popov](https://github.com/CurtizJ)). +* Reduce memory usage when reading with `s3` / `url` / `hdfs` formats `Parquet`, `ORC`, `Arrow` (controlled by setting `input_format_allow_seeks`, enabled by default). Also add setting `remote_read_min_bytes_for_seek` to control seeks. Closes [#10461](https://github.com/ClickHouse/ClickHouse/issues/10461). Closes [#16857](https://github.com/ClickHouse/ClickHouse/issues/16857). [#30936](https://github.com/ClickHouse/ClickHouse/pull/30936) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem` and `merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem`. [#30970](https://github.com/ClickHouse/ClickHouse/pull/30970) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Do not allow to drop a table or dictionary if some tables or dictionaries depend on it. [#30977](https://github.com/ClickHouse/ClickHouse/pull/30977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Only grab AlterLock when we do alter command. Let's see if the assumption is correct. [#31010](https://github.com/ClickHouse/ClickHouse/pull/31010) ([Amos Bird](https://github.com/amosbird)). +* The local session inside a Clickhouse dictionary source won't send its events to the session log anymore. This fixes a possible deadlock (tsan alert) on shutdown. Also this PR fixes flaky `test_dictionaries_dependency_xml/`. [#31013](https://github.com/ClickHouse/ClickHouse/pull/31013) ([Vitaly Baranov](https://github.com/vitlibar)). +* Cancel vertical merges when partition is dropped. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/25684 and https://github.com/ClickHouse/ClickHouse/pull/30996. [#31057](https://github.com/ClickHouse/ClickHouse/pull/31057) ([Amos Bird](https://github.com/amosbird)). +* Support `IF EXISTS` modifier for `RENAME DATABASE`/`TABLE`/`DICTIONARY` query, If this directive is used, one will not get an error if the DATABASE/TABLE/DICTIONARY to be renamed doesn't exist. [#31081](https://github.com/ClickHouse/ClickHouse/pull/31081) ([victorgao](https://github.com/kafka1991)). +* Function name normalization for ALTER queries. This helps avoid metadata mismatch between creating table with indices/projections and adding indices/projections via alter commands. This is a follow-up PR of https://github.com/ClickHouse/ClickHouse/pull/20174. Mark as improvements as there are no bug reports and the senario is somehow rare. [#31095](https://github.com/ClickHouse/ClickHouse/pull/31095) ([Amos Bird](https://github.com/amosbird)). +* Enable multiline editing in clickhouse-client by default. This addresses [#31121](https://github.com/ClickHouse/ClickHouse/issues/31121) . [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123) ([Amos Bird](https://github.com/amosbird)). +* Use DiskPtr instead of OS's file system API in class IDiskRemote in order to get more extendiability. Closes [#31117](https://github.com/ClickHouse/ClickHouse/issues/31117). [#31136](https://github.com/ClickHouse/ClickHouse/pull/31136) ([Yangkuan Liu](https://github.com/LiuYangkuan)). +* Now every replica will send to client only incremental information about profile events counters. [#31155](https://github.com/ClickHouse/ClickHouse/pull/31155) ([Dmitry Novik](https://github.com/novikd)). +* - Syntax changed so now backup engine should be set explicitly: `BACKUP ... TO Disk('backups', 'path\')` - Changed the format of backup's metadata, now it's in XML - Backup of a whole database now works. [#31178](https://github.com/ClickHouse/ClickHouse/pull/31178) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improved backoff for background cleanup tasks in `MergeTree`. Settings `merge_tree_clear_old_temporary_directories_interval_seconds` and `merge_tree_clear_old_parts_interval_seconds` moved form users settings to merge tree settings. [#31180](https://github.com/ClickHouse/ClickHouse/pull/31180) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Optimize function `mapContains` to reading of subcolumn `key` with enabled settings `optimize_functions_to_subcolumns`. [#31218](https://github.com/ClickHouse/ClickHouse/pull/31218) ([Anton Popov](https://github.com/CurtizJ)). +* If some obsolete setting is changed show warning in `system.warnings`. [#31252](https://github.com/ClickHouse/ClickHouse/pull/31252) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Optimize function `tupleElement` to reading of subcolumn with enabled setting `optimize_functions_to_subcolumns`. [#31261](https://github.com/ClickHouse/ClickHouse/pull/31261) ([Anton Popov](https://github.com/CurtizJ)). +* Initial user's roles are used now to find row policies, see [#31080](https://github.com/ClickHouse/ClickHouse/issues/31080). [#31262](https://github.com/ClickHouse/ClickHouse/pull/31262) ([Vitaly Baranov](https://github.com/vitlibar)). +* Previously progress was shown only for `numbers` table function, not for `numbers_mt`. Now for `numbers_mt` it is also shown. [#31318](https://github.com/ClickHouse/ClickHouse/pull/31318) ([Kseniia Sumarokova](https://github.com/kssenii)). +* return fake create query when executing `show create table` on system's tables. [#31391](https://github.com/ClickHouse/ClickHouse/pull/31391) ([SuperDJY](https://github.com/cmsxbc)). +* MaterializedMySQL now handles `CREATE TABLE ... LIKE ...` DDL queries. [#31410](https://github.com/ClickHouse/ClickHouse/pull/31410) ([Stig Bakken](https://github.com/stigsb)). +* Default value of `http_send_timeout` and `http_receive_timeout` settings changed from 1800 (30 minutes) to 180 (3 minutes). [#31450](https://github.com/ClickHouse/ClickHouse/pull/31450) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Throw an exception if there is some garbage after field in JSONCompactStrings(EachRow) format. [#31455](https://github.com/ClickHouse/ClickHouse/pull/31455) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix waiting of the editor during interactive query edition (`waitpid()` returns -1 on `SIGWINCH` and `EDITOR` and `clickhouse-local`/`clickhouse-client` works concurrently). [#31456](https://github.com/ClickHouse/ClickHouse/pull/31456) ([Azat Khuzhin](https://github.com/azat)). +* Add `--pager` support for `clickhouse-local`. [#31457](https://github.com/ClickHouse/ClickHouse/pull/31457) ([Azat Khuzhin](https://github.com/azat)). +* Better analysis for `min/max/count` projection. Now, with enabled `allow_experimental_projection_optimization`, virtual `min/max/count` projection can be used together with columns from partition key. [#31474](https://github.com/ClickHouse/ClickHouse/pull/31474) ([Amos Bird](https://github.com/amosbird)). +* Use shard and replica name from `Replicated` database arguments when expanding macros in `ReplicatedMergeTree` arguments if these macros are not defined in config. Closes [#31471](https://github.com/ClickHouse/ClickHouse/issues/31471). [#31488](https://github.com/ClickHouse/ClickHouse/pull/31488) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Better exception message when `users.xml` cannot be loaded due to bad password hash. This closes [#24126](https://github.com/ClickHouse/ClickHouse/issues/24126). [#31557](https://github.com/ClickHouse/ClickHouse/pull/31557) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)). +* Add bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)). +* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to use named collections configuration for kafka and rabbitmq engines (the same way as for other intgration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)). +* ClickHouse dictionary source support named connections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MaterializedMySQL: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)). +* Recreate system.*_log tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)). +* Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support PostgreSQL style ALTER MODIFY COLUMN. [#32003](https://github.com/ClickHouse/ClickHouse/pull/32003) ([SuperDJY](https://github.com/cmsxbc)). +* Remove excessive `DESC TABLE` requests for `remote()` (in case of `remote('127.1', system.one)` (i.e. identifier as the db.table instead of string) there was excessive `DESC TABLE` request). [#32019](https://github.com/ClickHouse/ClickHouse/pull/32019) ([Azat Khuzhin](https://github.com/azat)). +* - Fix a bug that opentelemetry span log duration is zero at the query level if there's query exception. [#32038](https://github.com/ClickHouse/ClickHouse/pull/32038) ([Frank Chen](https://github.com/FrankChen021)). +* Added ClickHouse `exception` and `exception_code` fields to opentelemetry span log. [#32040](https://github.com/ClickHouse/ClickHouse/pull/32040) ([Frank Chen](https://github.com/FrankChen021)). +* Allow a user configured `hdfs_replication` parameter for DiskHdfs and StorageHdfs. Closes [#32039](https://github.com/ClickHouse/ClickHouse/issues/32039). [#32049](https://github.com/ClickHouse/ClickHouse/pull/32049) ([leosunli](https://github.com/leosunli)). +* Allow to write `+` before Float32/Float64 values. [#32079](https://github.com/ClickHouse/ClickHouse/pull/32079) ([Kruglov Pavel](https://github.com/Avogar)). +* - returns Content-Type as 'application/json' for `JSONEachRow` format if `output_format_json_array_of_rows` is enabled. [#32112](https://github.com/ClickHouse/ClickHouse/pull/32112) ([Frank Chen](https://github.com/FrankChen021)). +* - Set Content-Type in HTTP packets issued from URL engine. [#32113](https://github.com/ClickHouse/ClickHouse/pull/32113) ([Frank Chen](https://github.com/FrankChen021)). +* Now `clickhouse-keeper` refuse to start or apply configuration changes when they contain duplicated IDs or endpoints. Fixes [#31339](https://github.com/ClickHouse/ClickHouse/issues/31339). [#32121](https://github.com/ClickHouse/ClickHouse/pull/32121) ([alesapin](https://github.com/alesapin)). +* Added `update_field` support for `RangeHashedDictionary`, `ComplexKeyRangeHashedDictionary`. [#32185](https://github.com/ClickHouse/ClickHouse/pull/32185) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve skiping unknown fields with Quoted escaping rule in Template/CustomSeparated formats. Previously we could skip only quoted strings, now we can skip values with any type. [#32204](https://github.com/ClickHouse/ClickHouse/pull/32204) ([Kruglov Pavel](https://github.com/Avogar)). +* Use `Content-Type: application/x-ndjson` (http://ndjson.org/) for output format `JSONEachRow`. [#32223](https://github.com/ClickHouse/ClickHouse/pull/32223) ([Dmitriy Dorofeev](https://github.com/deem0n)). +* - Improve the operation name of an opentelemetry span. [#32234](https://github.com/ClickHouse/ClickHouse/pull/32234) ([Frank Chen](https://github.com/FrankChen021)). +* Support default expression for storage hdfs and optimize fetching when source is column oriented. [#32256](https://github.com/ClickHouse/ClickHouse/pull/32256) ([李扬](https://github.com/taiyang-li)). + +#### Bug Fix +* Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)). +* Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). +* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). +* Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). + +#### Build/Testing/Packaging Improvement +* Hermetic builds: use fixed version of libc and make sure that no source or binary files from the host OS are using during build. This closes [#27133](https://github.com/ClickHouse/ClickHouse/issues/27133). This closes [#21435](https://github.com/ClickHouse/ClickHouse/issues/21435). This closes [#30462](https://github.com/ClickHouse/ClickHouse/issues/30462). [#30011](https://github.com/ClickHouse/ClickHouse/pull/30011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use our own CMakeLists for `zlib-ng`, `cassandra`, `mariadb-connector-c` and `xz`, `re2`, `sentry`, `gsasl`, `arrow`, `protobuf`. This is needed for [#20151](https://github.com/ClickHouse/ClickHouse/issues/20151). Part of [#9226](https://github.com/ClickHouse/ClickHouse/issues/9226). A small step towards removal of annoying trash from the build system. [#30599](https://github.com/ClickHouse/ClickHouse/pull/30599) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build snappy error in [#30790](https://github.com/ClickHouse/ClickHouse/issues/30790) Update of contrib/snappy is in https://github.com/google/snappy/pull/145/files. [#30796](https://github.com/ClickHouse/ClickHouse/pull/30796) ([李扬](https://github.com/taiyang-li)). +* Drop support for using Ordinary databases with MaterializedMySQL. [#31292](https://github.com/ClickHouse/ClickHouse/pull/31292) ([Stig Bakken](https://github.com/stigsb)). +* Initial support for risc-v. See development/build-cross-riscv for quirks and build command that was tested. [#31309](https://github.com/ClickHouse/ClickHouse/pull/31309) ([Vladimir Smirnov](https://github.com/Civil)). +* Remove hardcoded repository name from CI scripts. [#31536](https://github.com/ClickHouse/ClickHouse/pull/31536) ([Constantine Peresypkin](https://github.com/pkit)). +* Avoid downloading toolchain tarballs for cross-compiling for FreeBSD. [#31672](https://github.com/ClickHouse/ClickHouse/pull/31672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The script for uploading packages to the artifactory is added. [#31748](https://github.com/ClickHouse/ClickHouse/pull/31748) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Replaced default ports for clickhouse-keeper internal communication from 44444 to 9234. Fixes [#30879](https://github.com/ClickHouse/ClickHouse/issues/30879). [#31799](https://github.com/ClickHouse/ClickHouse/pull/31799) ([alesapin](https://github.com/alesapin)). +* More correct setting up capabilities inside Docker. [#31802](https://github.com/ClickHouse/ClickHouse/pull/31802) ([Constantine Peresypkin](https://github.com/pkit)). +* Revert changes from [#28016](https://github.com/ClickHouse/ClickHouse/issues/28016): archive.ubuntu.com should be faster in general than RU mirror. [#31822](https://github.com/ClickHouse/ClickHouse/pull/31822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove filesystem path to the build directory from binaries to enable reproducible builds. This needed for [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31838](https://github.com/ClickHouse/ClickHouse/pull/31838) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make ClickHouse build fully reproducible (byte identical on different machines). This closes [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31899](https://github.com/ClickHouse/ClickHouse/pull/31899) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Adjust artifactory pusher to a new bucket paths - Use only version or pull request number in bucket, no `0` - Create a function to read github event data. [#31952](https://github.com/ClickHouse/ClickHouse/pull/31952) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Build rpm and tgz packages in master and release branches workfolw. [#32048](https://github.com/ClickHouse/ClickHouse/pull/32048) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix broken symlink for sysroot/linux-riscv64/usr/lib. [#32071](https://github.com/ClickHouse/ClickHouse/pull/32071) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix some corner cases with intersect/except. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix JSONValue/Query with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix StorageMerge with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Rewrite right distributed table in local join. solves [#25809](https://github.com/ClickHouse/ClickHouse/issues/25809). [#31105](https://github.com/ClickHouse/ClickHouse/pull/31105) ([abel-cheng](https://github.com/abel-cheng)). +* Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). +* Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix bug with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix progress for short INSERT SELECT queries. [#31510](https://github.com/ClickHouse/ClickHouse/pull/31510) ([Azat Khuzhin](https://github.com/azat)). +* * Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). * Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). +* All non-x86 builds were broken, because we don't have tests for them. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix sparkbars are not aligned, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)). +* `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed null pointer exception in `MATERIALIZE COLUMN`. [#31679](https://github.com/ClickHouse/ClickHouse/pull/31679) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([Alexander Tokmakov](https://github.com/tavplubix)). +* * Fixed function ngrams when string contains utf8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)). +* Fix exception on some of the applications of `decrypt` function on Nullable columns. This closes [#31662](https://github.com/ClickHouse/ClickHouse/issues/31662). This closes [#31426](https://github.com/ClickHouse/ClickHouse/issues/31426). [#31707](https://github.com/ClickHouse/ClickHouse/pull/31707) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed rare segfault on concurrent `ATTACH PARTITION` queries. [#31738](https://github.com/ClickHouse/ClickHouse/pull/31738) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix disabling query profiler (In case of `query_profiler_real_time_period_ns>0`/`query_profiler_cpu_time_period_ns>0` query profiler can stayed enabled even after query finished). [#31740](https://github.com/ClickHouse/ClickHouse/pull/31740) ([Azat Khuzhin](https://github.com/azat)). +* Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Fix crash with empty result on odbc query. Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible assertion `../src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion '!hasPendingData()' failed.` in TSKV format. [#31804](https://github.com/ClickHouse/ClickHouse/pull/31804) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix recursive user defined functions crash. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix invalid cast of nullable type when nullable primary key is used. This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). +* Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)). +* Fix a bug about function transform with decimal args. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([Shuai li](https://github.com/loneylee)). +* - Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler` - Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). +* Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). +* Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix FileLog engine unnesessary create meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)). +* MaterializedMySQL: Fix rare corruption of DECIMAL data. [#31990](https://github.com/ClickHouse/ClickHouse/pull/31990) ([Håvard Kvålen](https://github.com/havardk)). +* Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix CREATE TABLE of Join Storage with multiply settings contains persistency. Close [#31680](https://github.com/ClickHouse/ClickHouse/issues/31680). [#32066](https://github.com/ClickHouse/ClickHouse/pull/32066) ([SuperDJY](https://github.com/cmsxbc)). +* Fix `CAST` from `Nullable` with `cast_keep_nullable` (`PARAMETER_OUT_OF_BOUND` error before for i.e. `toUInt32OrDefault(toNullable(toUInt32(1)))`). [#32080](https://github.com/ClickHouse/ClickHouse/pull/32080) ([Azat Khuzhin](https://github.com/azat)). +* Dictionaries fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). +* Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([Alexander Tokmakov](https://github.com/tavplubix)). +* XML dictionaries identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix parsing error while NaN deserializing for `Nullable(Float)` for `Quoted` escaping rule. [#32190](https://github.com/ClickHouse/ClickHouse/pull/32190) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix window view parser. [#32232](https://github.com/ClickHouse/ClickHouse/pull/32232) ([vxider](https://github.com/Vxider)). +* Server might fail to start with `Cannot attach 1 tables due to cyclic dependencies` error if `Dictionary` table looks at XML-dictionary with the same name, it's fixed. Fixes [#31315](https://github.com/ClickHouse/ClickHouse/issues/31315). [#32288](https://github.com/ClickHouse/ClickHouse/pull/32288) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Build + +* support compile in arm machine with parameter "-DENABLE_TESTS=OFF". [#31007](https://github.com/ClickHouse/ClickHouse/pull/31007) ([zhanghuajie](https://github.com/zhanghuajieHIT)). + +#### Improvement (changelog entry is not required) + +* Make remote_filesystem_read_method=threadpool by default. [#31291](https://github.com/ClickHouse/ClickHouse/pull/31291) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Imrovement (changelog entry is not required) + +* Rename setting value `read_threadpool` to `threadpool` for setting `remote_filesystem_read_method`. [#31224](https://github.com/ClickHouse/ClickHouse/pull/31224) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Update permissions-for-queries.md of cn'. [#30902](https://github.com/ClickHouse/ClickHouse/pull/30902) ([Laurie Li](https://github.com/laurieliyang)). +* NO CL ENTRY: 'Make use of untuple alias for untupled columns names prefix'. [#30984](https://github.com/ClickHouse/ClickHouse/pull/30984) ([qieqieplus](https://github.com/qieqieplus)). +* NO CL ENTRY: 'Add banner block for index,company,careers pages'. [#31647](https://github.com/ClickHouse/ClickHouse/pull/31647) ([Tom Risse](https://github.com/flickerbox-tom)). +* NO CL ENTRY: 'Revert "Fixed null pointer exception in `MATERIALIZE COLUMN`"'. [#31692](https://github.com/ClickHouse/ClickHouse/pull/31692) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Check time limit sending data for global in.'. [#31805](https://github.com/ClickHouse/ClickHouse/pull/31805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Fix syntax error: drop comma'. [#32095](https://github.com/ClickHouse/ClickHouse/pull/32095) ([Federico Ceratto](https://github.com/FedericoCeratto)). +* NO CL ENTRY: 'Revert "Add a test with 20000 mutations in one query"'. [#32326](https://github.com/ClickHouse/ClickHouse/pull/32326) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "Revert "Add a test with 20000 mutations in one query""'. [#32327](https://github.com/ClickHouse/ClickHouse/pull/32327) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.12.2.17-stable.md b/docs/changelogs/v21.12.2.17-stable.md new file mode 100644 index 00000000000..909bc7917c7 --- /dev/null +++ b/docs/changelogs/v21.12.2.17-stable.md @@ -0,0 +1,22 @@ +### ClickHouse release v21.12.2.17-stable FIXME as compared to v21.12.1.9017-prestable + +#### Bug Fix +* Backported in [#32693](https://github.com/ClickHouse/ClickHouse/issues/32693): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32681](https://github.com/ClickHouse/ClickHouse/issues/32681): Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#32483](https://github.com/ClickHouse/ClickHouse/issues/32483): Fix 'APPLY lambda' parsing which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32542](https://github.com/ClickHouse/ClickHouse/issues/32542): Some replication queue entries might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32710](https://github.com/ClickHouse/ClickHouse/issues/32710): Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#32569](https://github.com/ClickHouse/ClickHouse/issues/32569): Fix crash in `JoinCommon::removeColumnNullability`, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#32770](https://github.com/ClickHouse/ClickHouse/issues/32770): Fix sparse_hashed dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#32634](https://github.com/ClickHouse/ClickHouse/issues/32634): Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#32632](https://github.com/ClickHouse/ClickHouse/issues/32632): Fix possible exception at RabbitMQ storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#32733](https://github.com/ClickHouse/ClickHouse/issues/32733): Fix surprisingly bad code in function `file`. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#32793](https://github.com/ClickHouse/ClickHouse/issues/32793): fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release): + +* Backported in [#32616](https://github.com/ClickHouse/ClickHouse/issues/32616): Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.12.3.32-stable.md b/docs/changelogs/v21.12.3.32-stable.md new file mode 100644 index 00000000000..3c08aae4cba --- /dev/null +++ b/docs/changelogs/v21.12.3.32-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v21.12.3.32-stable FIXME as compared to v21.12.2.17-stable + +#### Bug Fix +* Backported in [#33018](https://github.com/ClickHouse/ClickHouse/issues/33018): - Clickhouse Keeper handler should remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32890](https://github.com/ClickHouse/ClickHouse/issues/32890): Fix LOGICAL_ERROR when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#33183](https://github.com/ClickHouse/ClickHouse/issues/33183): Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32904](https://github.com/ClickHouse/ClickHouse/issues/32904): `MergeTree` table engine might silently skip some mutations if there are too many running mutations or in case of high memory consumption, it's fixed. Fixes [#17882](https://github.com/ClickHouse/ClickHouse/issues/17882). [#32814](https://github.com/ClickHouse/ClickHouse/pull/32814) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#33047](https://github.com/ClickHouse/ClickHouse/issues/33047): Fix optimization with lazy seek for async reads from remote fs. Closes [#32803](https://github.com/ClickHouse/ClickHouse/issues/32803). [#32835](https://github.com/ClickHouse/ClickHouse/pull/32835) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#32932](https://github.com/ClickHouse/ClickHouse/issues/32932): Close [#32487](https://github.com/ClickHouse/ClickHouse/issues/32487). [#32914](https://github.com/ClickHouse/ClickHouse/pull/32914) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#32962](https://github.com/ClickHouse/ClickHouse/issues/32962): Fix a regression in `replaceRegexpAll` function. The function worked incorrectly when matched substring was empty. This closes [#32777](https://github.com/ClickHouse/ClickHouse/issues/32777). This closes [#30245](https://github.com/ClickHouse/ClickHouse/issues/30245). [#32945](https://github.com/ClickHouse/ClickHouse/pull/32945) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#33067](https://github.com/ClickHouse/ClickHouse/issues/33067): Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#33100](https://github.com/ClickHouse/ClickHouse/issues/33100): Fix Context leak in case of cancel_http_readonly_queries_on_client_close (i.e. leaking of external tables that had been uploaded the the server and other resources). [#32982](https://github.com/ClickHouse/ClickHouse/pull/32982) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#33123](https://github.com/ClickHouse/ClickHouse/issues/33123): Fix error `Invalid version for SerializationLowCardinality key column` in case of reading from `LowCardinality` column with `local_filesystem_read_prefetch` or `remote_filesystem_read_prefetch` enabled. [#33046](https://github.com/ClickHouse/ClickHouse/pull/33046) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.12.4.1-stable.md b/docs/changelogs/v21.12.4.1-stable.md new file mode 100644 index 00000000000..7c028592876 --- /dev/null +++ b/docs/changelogs/v21.12.4.1-stable.md @@ -0,0 +1,13 @@ +### ClickHouse release v21.12.4.1-stable FIXME as compared to v21.12.3.32-stable + +#### Improvement +* Backported in [#33792](https://github.com/ClickHouse/ClickHouse/issues/33792): Create parent directories in DiskS3::restoreFileOperations method. [#33730](https://github.com/ClickHouse/ClickHouse/pull/33730) ([ianton-ru](https://github.com/ianton-ru)). + +#### Bug Fix +* Backported in [#33551](https://github.com/ClickHouse/ClickHouse/issues/33551): Fix null pointer dereference in low cardinality data when deserializing LowCardinality data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#33537](https://github.com/ClickHouse/ClickHouse/issues/33537): Fix ORC stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Backported in [#33654](https://github.com/ClickHouse/ClickHouse/issues/33654): Fix segfault in Avro that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v21.2.1.5869-prestable.md b/docs/changelogs/v21.2.1.5869-prestable.md new file mode 100644 index 00000000000..43703bc13b9 --- /dev/null +++ b/docs/changelogs/v21.2.1.5869-prestable.md @@ -0,0 +1,156 @@ +### ClickHouse release v21.2.1.5869-prestable FIXME as compared to v21.1.1.5646-prestable + +#### Backward Incompatible Change +* - Fix memory tracking for `OPTIMIZE TABLE`/merges - Account query memory limits and sampling for `OPTIMIZE TABLE`/merges. [#18772](https://github.com/ClickHouse/ClickHouse/pull/18772) ([Azat Khuzhin](https://github.com/azat)). +* Forbid `lcm`/`gcd` for floats. [#19532](https://github.com/ClickHouse/ClickHouse/pull/19532) ([Azat Khuzhin](https://github.com/azat)). +* Bitwise functions (`bitAnd`, `bitOr`, etc) are forbidden for floating point arguments. Now you have to do explicit cast to integer. [#19853](https://github.com/ClickHouse/ClickHouse/pull/19853) ([Azat Khuzhin](https://github.com/azat)). + +#### New Feature +* add support for zstd long option for better compression of string columns to save space. [#17184](https://github.com/ClickHouse/ClickHouse/pull/17184) ([ygrek](https://github.com/ygrek)). +* - Added support of mapping LDAP group names, and attribute values in general, to local roles for users from ldap user directories. [#17211](https://github.com/ClickHouse/ClickHouse/pull/17211) ([Denis Glazachev](https://github.com/traceon)). +* Data type `Nested` now supports arbitrary levels of nesting. Introduced subcolumns of complex types, such as `size0` in `Array`, `null` in `Nullable`, names of `Tuple` elements, which can be read without reading of whole column. [#17310](https://github.com/ClickHouse/ClickHouse/pull/17310) ([Anton Popov](https://github.com/CurtizJ)). +* Add support of tuple argument to `argMin` and `argMax` functions. [#17359](https://github.com/ClickHouse/ClickHouse/pull/17359) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Added `Nullable` support for `FlatDictionary`, `HashedDictionary`, `ComplexKeyHashedDictionary`, `DirectDictionary`, `ComplexKeyDirectDictionary`, `RangeHashedDictionary`. [#18236](https://github.com/ClickHouse/ClickHouse/pull/18236) ([Maksim Kita](https://github.com/kitaisreal)). +* Disallow floating point column as partition key related to : [#18421](https://github.com/ClickHouse/ClickHouse/issues/18421)#event-4147046255. [#18464](https://github.com/ClickHouse/ClickHouse/pull/18464) ([hexiaoting](https://github.com/hexiaoting)). +* Add function decodeXMLComponent to decode characters for XML. ``` SELECT decodeXMLComponent('Hello,"world"!'); ``` [#17659](https://github.com/ClickHouse/ClickHouse/issues/17659). [#18542](https://github.com/ClickHouse/ClickHouse/pull/18542) ([nauta](https://github.com/nautaa)). +* Added PostgreSQL table engine (both select/insert, with support for multidimensional arrays), also as table function. Added PostgreSQL dictionary source. Added PostgreSQL database engine. [#18554](https://github.com/ClickHouse/ClickHouse/pull/18554) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `SELECT ALL` syntax. closes [#18706](https://github.com/ClickHouse/ClickHouse/issues/18706). [#18723](https://github.com/ClickHouse/ClickHouse/pull/18723) ([flynn](https://github.com/ucasfl)). +* Add three functions for map data type: 1. mapContains(map, key) to check weather map.keys include the second parameter key. 2. mapKeys(map) return all the keys in Array format 3. mapValues(map) return all the values in Array format. [#18788](https://github.com/ClickHouse/ClickHouse/pull/18788) ([hexiaoting](https://github.com/hexiaoting)). +* Support MetaKey+Enter hotkey binding in play ui. [#19012](https://github.com/ClickHouse/ClickHouse/pull/19012) ([sundyli](https://github.com/sundy-li)). +* Function formatDateTime support the %Q modification to format date to quarter. ... [#19224](https://github.com/ClickHouse/ClickHouse/pull/19224) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* ... [#19261](https://github.com/ClickHouse/ClickHouse/pull/19261) ([RegulusZ](https://github.com/RegulusZ)). +* Add factories' objects names, created during query, into system.query_log. Closes [#18495](https://github.com/ClickHouse/ClickHouse/issues/18495). [#19371](https://github.com/ClickHouse/ClickHouse/pull/19371) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `sign` math function. [#19527](https://github.com/ClickHouse/ClickHouse/pull/19527) ([flynn](https://github.com/ucasfl)). +* Added functions `parseDateTimeBestEffortUSOrZero`, `parseDateTimeBestEffortUSOrNull`. [#19712](https://github.com/ClickHouse/ClickHouse/pull/19712) ([Maksim Kita](https://github.com/kitaisreal)). +* ... [#19764](https://github.com/ClickHouse/ClickHouse/pull/19764) ([emhlbmc](https://github.com/emhlbmc)). + +#### Performance Improvement +* Use a connection pool for S3 connections, controlled by the `s3_max_connections` settings. [#13405](https://github.com/ClickHouse/ClickHouse/pull/13405) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Rewrite `sumIf()` and `sum(if())` function to `countIf()` function when logically equivalent. [#17041](https://github.com/ClickHouse/ClickHouse/pull/17041) ([flynn](https://github.com/ucasfl)). +* Update libcxx and use unstable ABI to provide better performance. [#18914](https://github.com/ClickHouse/ClickHouse/pull/18914) ([Daniel Kutenin](https://github.com/danlark1)). +* Faster parts removal by lowering the number of `stat` syscalls. This returns the optimization that existed while ago. More safe interface of `IDisk`. This closes [#19065](https://github.com/ClickHouse/ClickHouse/issues/19065). [#19086](https://github.com/ClickHouse/ClickHouse/pull/19086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up aggregate function `sum`. Improvement only visible on synthetic benchmarks and not very practical. [#19216](https://github.com/ClickHouse/ClickHouse/pull/19216) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support splitting `Filter` step of query plan into `Expression + Filter` pair. Together with `Expression + Expression` merging optimization ([#17458](https://github.com/ClickHouse/ClickHouse/issues/17458)) it may delay execution for some expressions after `Filter` step. [#19253](https://github.com/ClickHouse/ClickHouse/pull/19253) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Reduce lock contention for multiple layers of the Buffer engine. [#19379](https://github.com/ClickHouse/ClickHouse/pull/19379) ([Azat Khuzhin](https://github.com/azat)). +* Slightly improve server latency by removing access to configuration on every connection. [#19863](https://github.com/ClickHouse/ClickHouse/pull/19863) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Added support for `WITH ... [AND] [PERIODIC] REFRESH [interval_in_sec]` clause when creating `LIVE VIEW` tables. [#14822](https://github.com/ClickHouse/ClickHouse/pull/14822) ([vzakaznikov](https://github.com/vzakaznikov)). +* - Add optimize_alias_column_prediction (on by default), that will: * Respect aliased columns in WHERE during partition pruning and skipping data using secondary indexes * Respect aliased columns in WHERE for trivial count queries for optimize_trivial_count * Respect aliased columns in GROUP BY/ORDER BY for optimize_aggregation_in_order/optimize_read_in_order. [#16995](https://github.com/ClickHouse/ClickHouse/pull/16995) ([sundyli](https://github.com/sundy-li)). +* Updated AWS C++ SDK in order to utilize global regions in S3. [#17870](https://github.com/ClickHouse/ClickHouse/pull/17870) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Support insert into table function `cluster`, and for both table functions `remote` and `cluster`, support distributing data across nodes by specify sharding key. Close [#16752](https://github.com/ClickHouse/ClickHouse/issues/16752). [#18264](https://github.com/ClickHouse/ClickHouse/pull/18264) ([flynn](https://github.com/ucasfl)). +* Support `EXISTS VIEW` syntax. [#18552](https://github.com/ClickHouse/ClickHouse/pull/18552) ([Du Chuan](https://github.com/spongedu)). +* Update librdkafka to v1.6.0-RC2. Fixes [#18668](https://github.com/ClickHouse/ClickHouse/issues/18668). [#18671](https://github.com/ClickHouse/ClickHouse/pull/18671) ([filimonov](https://github.com/filimonov)). +* Allow CTE to be further aliased. Propagate CSE to subqueries in the same level when `enable_global_with_statement = 1`. This fixes [#17378](https://github.com/ClickHouse/ClickHouse/issues/17378) . This fixes https://github.com/ClickHouse/ClickHouse/pull/16575#issuecomment-753416235 . [#18684](https://github.com/ClickHouse/ClickHouse/pull/18684) ([Amos Bird](https://github.com/amosbird)). +* Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)). +* Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)). +* - Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender) - Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)). +* Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)). +* Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)). +* Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). +* The exception when function `bar` is called with certain NaN argument may be slightly misleading in previous versions. This fixes [#19088](https://github.com/ClickHouse/ClickHouse/issues/19088). [#19107](https://github.com/ClickHouse/ClickHouse/pull/19107) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow change `max_server_memory_usage` without restart. This closes [#18154](https://github.com/ClickHouse/ClickHouse/issues/18154). [#19186](https://github.com/ClickHouse/ClickHouse/pull/19186) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong alignment of values of `IPv4` data type in Pretty formats. They were aligned to the right, not to the left. This closes [#19184](https://github.com/ClickHouse/ClickHouse/issues/19184). [#19339](https://github.com/ClickHouse/ClickHouse/pull/19339) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow docker to be executed with arbitrary uid. [#19374](https://github.com/ClickHouse/ClickHouse/pull/19374) ([filimonov](https://github.com/filimonov)). +* Add metrics for MergeTree parts (Wide/Compact/InMemory) types. [#19381](https://github.com/ClickHouse/ClickHouse/pull/19381) ([Azat Khuzhin](https://github.com/azat)). +* Improve MySQL compatibility. [#19387](https://github.com/ClickHouse/ClickHouse/pull/19387) ([Daniil Kondratyev](https://github.com/dankondr)). +* Add `http_referer` field to `system.query_log`, `system.processes`, etc. This closes [#19389](https://github.com/ClickHouse/ClickHouse/issues/19389). [#19390](https://github.com/ClickHouse/ClickHouse/pull/19390) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `toIPv6` function parses `IPv4` addresses. [#19518](https://github.com/ClickHouse/ClickHouse/pull/19518) ([Bharat Nallan](https://github.com/bharatnc)). +* Support using the new location of `.debug` file. This fixes [#19348](https://github.com/ClickHouse/ClickHouse/issues/19348). [#19520](https://github.com/ClickHouse/ClickHouse/pull/19520) ([Amos Bird](https://github.com/amosbird)). +* Enable function length/empty/notEmpty for datatype map, which returns keys number in map. [#19530](https://github.com/ClickHouse/ClickHouse/pull/19530) ([李扬](https://github.com/taiyang-li)). +* Support constant result in function `multiIf`. [#19533](https://github.com/ClickHouse/ClickHouse/pull/19533) ([Maksim Kita](https://github.com/kitaisreal)). +* Add an option to disable validation of checksums on reading. Should never be used in production. Please do not expect any benefits in disabling it. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network. In my observations there is no performance difference or it is less than 0.5%. [#19588](https://github.com/ClickHouse/ClickHouse/pull/19588) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Dictionary better error message during attribute parsing. [#19678](https://github.com/ClickHouse/ClickHouse/pull/19678) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix rare `max_number_of_merges_with_ttl_in_pool` limit overrun (more merges with TTL can be assigned) for non-replicated MergeTree. [#19708](https://github.com/ClickHouse/ClickHouse/pull/19708) ([alesapin](https://github.com/alesapin)). +* Insuffiient arguments check in `positionCaseInsensitiveUTF8` function triggered address sanitizer. [#19720](https://github.com/ClickHouse/ClickHouse/pull/19720) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add separate pool for message brokers (RabbitMQ and Kafka). [#19722](https://github.com/ClickHouse/ClickHouse/pull/19722) ([Azat Khuzhin](https://github.com/azat)). +* In distributed queries if the setting `async_socket_for_remote` is enabled, it was possible to get stack overflow at least in debug build configuration if very deeply nested data type is used in table (e.g. `Array(Array(Array(...more...)))`). This fixes [#19108](https://github.com/ClickHouse/ClickHouse/issues/19108). This change introduces minor backward incompatibility: excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. [#19736](https://github.com/ClickHouse/ClickHouse/pull/19736) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Table function `S3` will use global region if the region can't be determined exactly. This closes [#10998](https://github.com/ClickHouse/ClickHouse/issues/10998). [#19750](https://github.com/ClickHouse/ClickHouse/pull/19750) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Clickhouse client query param CTE added test. [#19762](https://github.com/ClickHouse/ClickHouse/pull/19762) ([Maksim Kita](https://github.com/kitaisreal)). +* Correctly output infinite arguments for `formatReadableTimeDelta` function. In previous versions, there was implicit conversion to implementation specific integer value. [#19791](https://github.com/ClickHouse/ClickHouse/pull/19791) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `S3` table function now supports `auto` compression mode (autodetect). This closes [#18754](https://github.com/ClickHouse/ClickHouse/issues/18754). [#19793](https://github.com/ClickHouse/ClickHouse/pull/19793) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Set charset to utf8mb4 when interacting with remote MySQL servers. Fixes [#19795](https://github.com/ClickHouse/ClickHouse/issues/19795). [#19800](https://github.com/ClickHouse/ClickHouse/pull/19800) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `--reconnect` option to `clickhouse-benchmark`. When this option is specified, it will reconnect before every request. This is needed for testing. [#19872](https://github.com/ClickHouse/ClickHouse/pull/19872) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* fix data type convert issue for mysql engine ... [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). +* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* to fix [#18894](https://github.com/ClickHouse/ClickHouse/issues/18894) Add a check to avoid exception when long column alias('table.column' style, usually auto-generated by BI tools like Looker) equals to long table name. [#18968](https://github.com/ClickHouse/ClickHouse/pull/18968) ([Daniel Qin](https://github.com/mathfool)). +* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). +* Fixed rare crashes when server run out of memory. [#18976](https://github.com/ClickHouse/ClickHouse/pull/18976) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Fix bug in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). +* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* - Split RemoteQueryExecutorReadContext into module part - Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Do not mark file for distributed send as broken on EOF. [#19290](https://github.com/ClickHouse/ClickHouse/pull/19290) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Added `cast`, `accurateCast`, `accurateCastOrNull` performance tests. [#19354](https://github.com/ClickHouse/ClickHouse/pull/19354) ([Maksim Kita](https://github.com/kitaisreal)). +* - Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([Vladimir C](https://github.com/vdimir)). +* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Extremes transform was already added to pipeline`. Fixes [#14100](https://github.com/ClickHouse/ClickHouse/issues/14100). [#19430](https://github.com/ClickHouse/ClickHouse/pull/19430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong deserialization of columns description. It makes INSERT into a table with a column named `\` impossible. [#19479](https://github.com/ClickHouse/ClickHouse/pull/19479) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). +* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([Alexander Tokmakov](https://github.com/tavplubix)). +* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)). +* Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)). +* Fix argMin/argMax crash when combining with -If. This fixes https://clickhouse-test-reports.s3.yandex.net/19800/7b8589dbde5bc621d1bcfd68a713e4684183f593/fuzzer_ubsan/report.html#fail1. [#19868](https://github.com/ClickHouse/ClickHouse/pull/19868) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Restore Kafka input in FreeBSD builds. [#18924](https://github.com/ClickHouse/ClickHouse/pull/18924) ([Alexandre Snarskii](https://github.com/snar)). +* Add integration tests run with memory sanitizer. [#18974](https://github.com/ClickHouse/ClickHouse/pull/18974) ([alesapin](https://github.com/alesapin)). +* Add SQLancer test docker image to run check in CI. [#19006](https://github.com/ClickHouse/ClickHouse/pull/19006) ([Ilya Yatsishin](https://github.com/qoega)). +* Added tests for defaults in URL and File engine. This closes [#5666](https://github.com/ClickHouse/ClickHouse/issues/5666). [#19015](https://github.com/ClickHouse/ClickHouse/pull/19015) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* - Enabling RBAC tests - Tests for SYSTEM privileges - Requirements name changes. [#19017](https://github.com/ClickHouse/ClickHouse/pull/19017) ([MyroTk](https://github.com/MyroTk)). +* Query Fuzzer will fuzz newly added tests more extensively. This closes [#18916](https://github.com/ClickHouse/ClickHouse/issues/18916). [#19185](https://github.com/ClickHouse/ClickHouse/pull/19185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow building librdkafka without ssl. [#19337](https://github.com/ClickHouse/ClickHouse/pull/19337) ([filimonov](https://github.com/filimonov)). +* Avoid UBSan reports in `arrayElement` function, `substring` and `arraySum`. Fixes [#19305](https://github.com/ClickHouse/ClickHouse/issues/19305). Fixes [#19287](https://github.com/ClickHouse/ClickHouse/issues/19287). This closes [#19336](https://github.com/ClickHouse/ClickHouse/issues/19336). [#19347](https://github.com/ClickHouse/ClickHouse/pull/19347) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential nullptr dereference in table function `VALUES`. [#19357](https://github.com/ClickHouse/ClickHouse/pull/19357) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow building ClickHouse with Kafka support on arm64. [#19369](https://github.com/ClickHouse/ClickHouse/pull/19369) ([filimonov](https://github.com/filimonov)). +* Integrate with [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings/) for better fuzzing. [#19480](https://github.com/ClickHouse/ClickHouse/pull/19480) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to explicitly enable or disable watchdog via environment variable `CLICKHOUSE_WATCHDOG_ENABLE`. By default it is enabled if server is not attached to terminal. [#19522](https://github.com/ClickHouse/ClickHouse/pull/19522) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Updating TestFlows AES encryption tests to support changes to the encrypt plaintext parameter. [#19674](https://github.com/ClickHouse/ClickHouse/pull/19674) ([vzakaznikov](https://github.com/vzakaznikov)). +* Made generation of macros.xml easier for integration tests. No more excessive logging from dicttoxml. dicttoxml project is not active for 5+ years. [#19697](https://github.com/ClickHouse/ClickHouse/pull/19697) ([Ilya Yatsishin](https://github.com/qoega)). +* Remove --project-directory for docker-compose in integration test. Fix logs formatting from docker container. [#19706](https://github.com/ClickHouse/ClickHouse/pull/19706) ([Ilya Yatsishin](https://github.com/qoega)). +* Fixed MemorySanitizer errors in cyrus-sasl and musl. [#19821](https://github.com/ClickHouse/ClickHouse/pull/19821) ([Ilya Yatsishin](https://github.com/qoega)). +* Add test for throwing an exception on inserting incorrect data in CollapsingMergeTree. [#19851](https://github.com/ClickHouse/ClickHouse/pull/19851) ([Kruglov Pavel](https://github.com/Avogar)). +* Adding retries for docker-compose start, stop and restart in TestFlows tests. [#19852](https://github.com/ClickHouse/ClickHouse/pull/19852) ([vzakaznikov](https://github.com/vzakaznikov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Remove useless codes'. [#19293](https://github.com/ClickHouse/ClickHouse/pull/19293) ([sundyli](https://github.com/sundy-li)). +* NO CL ENTRY: 'Merging [#19387](https://github.com/ClickHouse/ClickHouse/issues/19387)'. [#19683](https://github.com/ClickHouse/ClickHouse/pull/19683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.2.10.48-stable.md b/docs/changelogs/v21.2.10.48-stable.md new file mode 100644 index 00000000000..11eea960931 --- /dev/null +++ b/docs/changelogs/v21.2.10.48-stable.md @@ -0,0 +1,11 @@ +### ClickHouse release v21.2.10.48-stable FIXME as compared to v21.2.9.41-stable + +#### Improvement +* Backported in [#23015](https://github.com/ClickHouse/ClickHouse/issues/23015): Set `background_fetches_pool_size` to 8 that is better for production usage with frequent small insertions or slow ZooKeeper cluster. [#22945](https://github.com/ClickHouse/ClickHouse/pull/22945) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23080](https://github.com/ClickHouse/ClickHouse/issues/23080): Raised the threshold on max number of matches in result of the function `extractAllGroupsHorizontal`. [#23036](https://github.com/ClickHouse/ClickHouse/pull/23036) ([Vasily Nemkov](https://github.com/Enmk)). + +#### Bug Fix +* Backported in [#23155](https://github.com/ClickHouse/ClickHouse/issues/23155): Fixed a bug with unlimited wait for auxiliary AWS requests. [#22594](https://github.com/ClickHouse/ClickHouse/pull/22594) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#23032](https://github.com/ClickHouse/ClickHouse/issues/23032): Fix error `Cannot find column in ActionsDAG result` which may happen if subquery uses `untuple`. Fixes [#22290](https://github.com/ClickHouse/ClickHouse/issues/22290). [#22991](https://github.com/ClickHouse/ClickHouse/pull/22991) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23171](https://github.com/ClickHouse/ClickHouse/issues/23171): Some values were formatted with alignment in center in table cells in `Markdown` format. Not anymore. [#23096](https://github.com/ClickHouse/ClickHouse/pull/23096) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.2.2.8-stable.md b/docs/changelogs/v21.2.2.8-stable.md new file mode 100644 index 00000000000..368243120f1 --- /dev/null +++ b/docs/changelogs/v21.2.2.8-stable.md @@ -0,0 +1,166 @@ +### ClickHouse release v21.2.2.8-stable FIXME as compared to v21.1.1.5646-prestable + +#### Backward Incompatible Change +* - Fix memory tracking for `OPTIMIZE TABLE`/merges - Account query memory limits and sampling for `OPTIMIZE TABLE`/merges. [#18772](https://github.com/ClickHouse/ClickHouse/pull/18772) ([Azat Khuzhin](https://github.com/azat)). +* Forbid `lcm`/`gcd` for floats. [#19532](https://github.com/ClickHouse/ClickHouse/pull/19532) ([Azat Khuzhin](https://github.com/azat)). +* Bitwise functions (`bitAnd`, `bitOr`, etc) are forbidden for floating point arguments. Now you have to do explicit cast to integer. [#19853](https://github.com/ClickHouse/ClickHouse/pull/19853) ([Azat Khuzhin](https://github.com/azat)). + +#### New Feature +* add support for zstd long option for better compression of string columns to save space. [#17184](https://github.com/ClickHouse/ClickHouse/pull/17184) ([ygrek](https://github.com/ygrek)). +* - Added support of mapping LDAP group names, and attribute values in general, to local roles for users from ldap user directories. [#17211](https://github.com/ClickHouse/ClickHouse/pull/17211) ([Denis Glazachev](https://github.com/traceon)). +* Data type `Nested` now supports arbitrary levels of nesting. Introduced subcolumns of complex types, such as `size0` in `Array`, `null` in `Nullable`, names of `Tuple` elements, which can be read without reading of whole column. [#17310](https://github.com/ClickHouse/ClickHouse/pull/17310) ([Anton Popov](https://github.com/CurtizJ)). +* Add support of tuple argument to `argMin` and `argMax` functions. [#17359](https://github.com/ClickHouse/ClickHouse/pull/17359) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Added `Nullable` support for `FlatDictionary`, `HashedDictionary`, `ComplexKeyHashedDictionary`, `DirectDictionary`, `ComplexKeyDirectDictionary`, `RangeHashedDictionary`. [#18236](https://github.com/ClickHouse/ClickHouse/pull/18236) ([Maksim Kita](https://github.com/kitaisreal)). +* Disallow floating point column as partition key related to : [#18421](https://github.com/ClickHouse/ClickHouse/issues/18421)#event-4147046255. [#18464](https://github.com/ClickHouse/ClickHouse/pull/18464) ([hexiaoting](https://github.com/hexiaoting)). +* Add function decodeXMLComponent to decode characters for XML. ``` SELECT decodeXMLComponent('Hello,"world"!'); ``` [#17659](https://github.com/ClickHouse/ClickHouse/issues/17659). [#18542](https://github.com/ClickHouse/ClickHouse/pull/18542) ([nauta](https://github.com/nautaa)). +* Added PostgreSQL table engine (both select/insert, with support for multidimensional arrays), also as table function. Added PostgreSQL dictionary source. Added PostgreSQL database engine. [#18554](https://github.com/ClickHouse/ClickHouse/pull/18554) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `SELECT ALL` syntax. closes [#18706](https://github.com/ClickHouse/ClickHouse/issues/18706). [#18723](https://github.com/ClickHouse/ClickHouse/pull/18723) ([flynn](https://github.com/ucasfl)). +* Add three functions for map data type: 1. mapContains(map, key) to check weather map.keys include the second parameter key. 2. mapKeys(map) return all the keys in Array format 3. mapValues(map) return all the values in Array format. [#18788](https://github.com/ClickHouse/ClickHouse/pull/18788) ([hexiaoting](https://github.com/hexiaoting)). +* Support MetaKey+Enter hotkey binding in play ui. [#19012](https://github.com/ClickHouse/ClickHouse/pull/19012) ([sundyli](https://github.com/sundy-li)). +* Function formatDateTime support the %Q modification to format date to quarter. ... [#19224](https://github.com/ClickHouse/ClickHouse/pull/19224) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* ... [#19261](https://github.com/ClickHouse/ClickHouse/pull/19261) ([RegulusZ](https://github.com/RegulusZ)). +* Add factories' objects names, created during query, into system.query_log. Closes [#18495](https://github.com/ClickHouse/ClickHouse/issues/18495). [#19371](https://github.com/ClickHouse/ClickHouse/pull/19371) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `sign` math function. [#19527](https://github.com/ClickHouse/ClickHouse/pull/19527) ([flynn](https://github.com/ucasfl)). +* Added functions `parseDateTimeBestEffortUSOrZero`, `parseDateTimeBestEffortUSOrNull`. [#19712](https://github.com/ClickHouse/ClickHouse/pull/19712) ([Maksim Kita](https://github.com/kitaisreal)). +* ... [#19764](https://github.com/ClickHouse/ClickHouse/pull/19764) ([emhlbmc](https://github.com/emhlbmc)). + +#### Performance Improvement +* Use a connection pool for S3 connections, controlled by the `s3_max_connections` settings. [#13405](https://github.com/ClickHouse/ClickHouse/pull/13405) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Rewrite `sumIf()` and `sum(if())` function to `countIf()` function when logically equivalent. [#17041](https://github.com/ClickHouse/ClickHouse/pull/17041) ([flynn](https://github.com/ucasfl)). +* Update libcxx and use unstable ABI to provide better performance. [#18914](https://github.com/ClickHouse/ClickHouse/pull/18914) ([Daniel Kutenin](https://github.com/danlark1)). +* Faster parts removal by lowering the number of `stat` syscalls. This returns the optimization that existed while ago. More safe interface of `IDisk`. This closes [#19065](https://github.com/ClickHouse/ClickHouse/issues/19065). [#19086](https://github.com/ClickHouse/ClickHouse/pull/19086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up aggregate function `sum`. Improvement only visible on synthetic benchmarks and not very practical. [#19216](https://github.com/ClickHouse/ClickHouse/pull/19216) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support splitting `Filter` step of query plan into `Expression + Filter` pair. Together with `Expression + Expression` merging optimization ([#17458](https://github.com/ClickHouse/ClickHouse/issues/17458)) it may delay execution for some expressions after `Filter` step. [#19253](https://github.com/ClickHouse/ClickHouse/pull/19253) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Reduce lock contention for multiple layers of the Buffer engine. [#19379](https://github.com/ClickHouse/ClickHouse/pull/19379) ([Azat Khuzhin](https://github.com/azat)). +* Slightly improve server latency by removing access to configuration on every connection. [#19863](https://github.com/ClickHouse/ClickHouse/pull/19863) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Added support for `WITH ... [AND] [PERIODIC] REFRESH [interval_in_sec]` clause when creating `LIVE VIEW` tables. [#14822](https://github.com/ClickHouse/ClickHouse/pull/14822) ([vzakaznikov](https://github.com/vzakaznikov)). +* - Add optimize_alias_column_prediction (on by default), that will: * Respect aliased columns in WHERE during partition pruning and skipping data using secondary indexes * Respect aliased columns in WHERE for trivial count queries for optimize_trivial_count * Respect aliased columns in GROUP BY/ORDER BY for optimize_aggregation_in_order/optimize_read_in_order. [#16995](https://github.com/ClickHouse/ClickHouse/pull/16995) ([sundyli](https://github.com/sundy-li)). +* Updated AWS C++ SDK in order to utilize global regions in S3. [#17870](https://github.com/ClickHouse/ClickHouse/pull/17870) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Support insert into table function `cluster`, and for both table functions `remote` and `cluster`, support distributing data across nodes by specify sharding key. Close [#16752](https://github.com/ClickHouse/ClickHouse/issues/16752). [#18264](https://github.com/ClickHouse/ClickHouse/pull/18264) ([flynn](https://github.com/ucasfl)). +* Support `EXISTS VIEW` syntax. [#18552](https://github.com/ClickHouse/ClickHouse/pull/18552) ([Du Chuan](https://github.com/spongedu)). +* Update librdkafka to v1.6.0-RC2. Fixes [#18668](https://github.com/ClickHouse/ClickHouse/issues/18668). [#18671](https://github.com/ClickHouse/ClickHouse/pull/18671) ([filimonov](https://github.com/filimonov)). +* Allow CTE to be further aliased. Propagate CSE to subqueries in the same level when `enable_global_with_statement = 1`. This fixes [#17378](https://github.com/ClickHouse/ClickHouse/issues/17378) . This fixes https://github.com/ClickHouse/ClickHouse/pull/16575#issuecomment-753416235 . [#18684](https://github.com/ClickHouse/ClickHouse/pull/18684) ([Amos Bird](https://github.com/amosbird)). +* Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)). +* Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)). +* - Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender) - Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)). +* Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)). +* Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)). +* Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). +* The exception when function `bar` is called with certain NaN argument may be slightly misleading in previous versions. This fixes [#19088](https://github.com/ClickHouse/ClickHouse/issues/19088). [#19107](https://github.com/ClickHouse/ClickHouse/pull/19107) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow change `max_server_memory_usage` without restart. This closes [#18154](https://github.com/ClickHouse/ClickHouse/issues/18154). [#19186](https://github.com/ClickHouse/ClickHouse/pull/19186) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong alignment of values of `IPv4` data type in Pretty formats. They were aligned to the right, not to the left. This closes [#19184](https://github.com/ClickHouse/ClickHouse/issues/19184). [#19339](https://github.com/ClickHouse/ClickHouse/pull/19339) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow docker to be executed with arbitrary uid. [#19374](https://github.com/ClickHouse/ClickHouse/pull/19374) ([filimonov](https://github.com/filimonov)). +* Add metrics for MergeTree parts (Wide/Compact/InMemory) types. [#19381](https://github.com/ClickHouse/ClickHouse/pull/19381) ([Azat Khuzhin](https://github.com/azat)). +* Improve MySQL compatibility. [#19387](https://github.com/ClickHouse/ClickHouse/pull/19387) ([Daniil Kondratyev](https://github.com/dankondr)). +* Add `http_referer` field to `system.query_log`, `system.processes`, etc. This closes [#19389](https://github.com/ClickHouse/ClickHouse/issues/19389). [#19390](https://github.com/ClickHouse/ClickHouse/pull/19390) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `toIPv6` function parses `IPv4` addresses. [#19518](https://github.com/ClickHouse/ClickHouse/pull/19518) ([Bharat Nallan](https://github.com/bharatnc)). +* Support using the new location of `.debug` file. This fixes [#19348](https://github.com/ClickHouse/ClickHouse/issues/19348). [#19520](https://github.com/ClickHouse/ClickHouse/pull/19520) ([Amos Bird](https://github.com/amosbird)). +* Enable function length/empty/notEmpty for datatype map, which returns keys number in map. [#19530](https://github.com/ClickHouse/ClickHouse/pull/19530) ([李扬](https://github.com/taiyang-li)). +* Support constant result in function `multiIf`. [#19533](https://github.com/ClickHouse/ClickHouse/pull/19533) ([Maksim Kita](https://github.com/kitaisreal)). +* Add an option to disable validation of checksums on reading. Should never be used in production. Please do not expect any benefits in disabling it. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network. In my observations there is no performance difference or it is less than 0.5%. [#19588](https://github.com/ClickHouse/ClickHouse/pull/19588) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Dictionary better error message during attribute parsing. [#19678](https://github.com/ClickHouse/ClickHouse/pull/19678) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix rare `max_number_of_merges_with_ttl_in_pool` limit overrun (more merges with TTL can be assigned) for non-replicated MergeTree. [#19708](https://github.com/ClickHouse/ClickHouse/pull/19708) ([alesapin](https://github.com/alesapin)). +* Insuffiient arguments check in `positionCaseInsensitiveUTF8` function triggered address sanitizer. [#19720](https://github.com/ClickHouse/ClickHouse/pull/19720) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add separate pool for message brokers (RabbitMQ and Kafka). [#19722](https://github.com/ClickHouse/ClickHouse/pull/19722) ([Azat Khuzhin](https://github.com/azat)). +* In distributed queries if the setting `async_socket_for_remote` is enabled, it was possible to get stack overflow at least in debug build configuration if very deeply nested data type is used in table (e.g. `Array(Array(Array(...more...)))`). This fixes [#19108](https://github.com/ClickHouse/ClickHouse/issues/19108). This change introduces minor backward incompatibility: excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. [#19736](https://github.com/ClickHouse/ClickHouse/pull/19736) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Table function `S3` will use global region if the region can't be determined exactly. This closes [#10998](https://github.com/ClickHouse/ClickHouse/issues/10998). [#19750](https://github.com/ClickHouse/ClickHouse/pull/19750) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Clickhouse client query param CTE added test. [#19762](https://github.com/ClickHouse/ClickHouse/pull/19762) ([Maksim Kita](https://github.com/kitaisreal)). +* Correctly output infinite arguments for `formatReadableTimeDelta` function. In previous versions, there was implicit conversion to implementation specific integer value. [#19791](https://github.com/ClickHouse/ClickHouse/pull/19791) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `S3` table function now supports `auto` compression mode (autodetect). This closes [#18754](https://github.com/ClickHouse/ClickHouse/issues/18754). [#19793](https://github.com/ClickHouse/ClickHouse/pull/19793) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Set charset to utf8mb4 when interacting with remote MySQL servers. Fixes [#19795](https://github.com/ClickHouse/ClickHouse/issues/19795). [#19800](https://github.com/ClickHouse/ClickHouse/pull/19800) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `--reconnect` option to `clickhouse-benchmark`. When this option is specified, it will reconnect before every request. This is needed for testing. [#19872](https://github.com/ClickHouse/ClickHouse/pull/19872) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* fix data type convert issue for mysql engine ... [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). +* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* to fix [#18894](https://github.com/ClickHouse/ClickHouse/issues/18894) Add a check to avoid exception when long column alias('table.column' style, usually auto-generated by BI tools like Looker) equals to long table name. [#18968](https://github.com/ClickHouse/ClickHouse/pull/18968) ([Daniel Qin](https://github.com/mathfool)). +* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). +* Fixed rare crashes when server run out of memory. [#18976](https://github.com/ClickHouse/ClickHouse/pull/18976) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Fix bug in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). +* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* - Split RemoteQueryExecutorReadContext into module part - Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Do not mark file for distributed send as broken on EOF. [#19290](https://github.com/ClickHouse/ClickHouse/pull/19290) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Added `cast`, `accurateCast`, `accurateCastOrNull` performance tests. [#19354](https://github.com/ClickHouse/ClickHouse/pull/19354) ([Maksim Kita](https://github.com/kitaisreal)). +* - Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([Vladimir C](https://github.com/vdimir)). +* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Extremes transform was already added to pipeline`. Fixes [#14100](https://github.com/ClickHouse/ClickHouse/issues/14100). [#19430](https://github.com/ClickHouse/ClickHouse/pull/19430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong deserialization of columns description. It makes INSERT into a table with a column named `\` impossible. [#19479](https://github.com/ClickHouse/ClickHouse/pull/19479) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19986](https://github.com/ClickHouse/ClickHouse/issues/19986): Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). +* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([Alexander Tokmakov](https://github.com/tavplubix)). +* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19922](https://github.com/ClickHouse/ClickHouse/issues/19922): Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([李扬](https://github.com/taiyang-li)). +* Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#20007](https://github.com/ClickHouse/ClickHouse/issues/20007): Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)). +* Fix argMin/argMax crash when combining with -If. This fixes https://clickhouse-test-reports.s3.yandex.net/19800/7b8589dbde5bc621d1bcfd68a713e4684183f593/fuzzer_ubsan/report.html#fail1. [#19868](https://github.com/ClickHouse/ClickHouse/pull/19868) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#19939](https://github.com/ClickHouse/ClickHouse/issues/19939): Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19935](https://github.com/ClickHouse/ClickHouse/issues/19935): BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#19996](https://github.com/ClickHouse/ClickHouse/issues/19996): - Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). +* Backported in [#20112](https://github.com/ClickHouse/ClickHouse/issues/20112): `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#20028](https://github.com/ClickHouse/ClickHouse/issues/20028): Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). +* Backported in [#20123](https://github.com/ClickHouse/ClickHouse/issues/20123): MaterializeMySQL: Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). +* Backported in [#20146](https://github.com/ClickHouse/ClickHouse/issues/20146): Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement +* Restore Kafka input in FreeBSD builds. [#18924](https://github.com/ClickHouse/ClickHouse/pull/18924) ([Alexandre Snarskii](https://github.com/snar)). +* Add integration tests run with memory sanitizer. [#18974](https://github.com/ClickHouse/ClickHouse/pull/18974) ([alesapin](https://github.com/alesapin)). +* Add SQLancer test docker image to run check in CI. [#19006](https://github.com/ClickHouse/ClickHouse/pull/19006) ([Ilya Yatsishin](https://github.com/qoega)). +* Added tests for defaults in URL and File engine. This closes [#5666](https://github.com/ClickHouse/ClickHouse/issues/5666). [#19015](https://github.com/ClickHouse/ClickHouse/pull/19015) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* - Enabling RBAC tests - Tests for SYSTEM privileges - Requirements name changes. [#19017](https://github.com/ClickHouse/ClickHouse/pull/19017) ([MyroTk](https://github.com/MyroTk)). +* Query Fuzzer will fuzz newly added tests more extensively. This closes [#18916](https://github.com/ClickHouse/ClickHouse/issues/18916). [#19185](https://github.com/ClickHouse/ClickHouse/pull/19185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow building librdkafka without ssl. [#19337](https://github.com/ClickHouse/ClickHouse/pull/19337) ([filimonov](https://github.com/filimonov)). +* Avoid UBSan reports in `arrayElement` function, `substring` and `arraySum`. Fixes [#19305](https://github.com/ClickHouse/ClickHouse/issues/19305). Fixes [#19287](https://github.com/ClickHouse/ClickHouse/issues/19287). This closes [#19336](https://github.com/ClickHouse/ClickHouse/issues/19336). [#19347](https://github.com/ClickHouse/ClickHouse/pull/19347) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential nullptr dereference in table function `VALUES`. [#19357](https://github.com/ClickHouse/ClickHouse/pull/19357) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow building ClickHouse with Kafka support on arm64. [#19369](https://github.com/ClickHouse/ClickHouse/pull/19369) ([filimonov](https://github.com/filimonov)). +* Integrate with [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings/) for better fuzzing. [#19480](https://github.com/ClickHouse/ClickHouse/pull/19480) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to explicitly enable or disable watchdog via environment variable `CLICKHOUSE_WATCHDOG_ENABLE`. By default it is enabled if server is not attached to terminal. [#19522](https://github.com/ClickHouse/ClickHouse/pull/19522) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Updating TestFlows AES encryption tests to support changes to the encrypt plaintext parameter. [#19674](https://github.com/ClickHouse/ClickHouse/pull/19674) ([vzakaznikov](https://github.com/vzakaznikov)). +* Made generation of macros.xml easier for integration tests. No more excessive logging from dicttoxml. dicttoxml project is not active for 5+ years. [#19697](https://github.com/ClickHouse/ClickHouse/pull/19697) ([Ilya Yatsishin](https://github.com/qoega)). +* Remove --project-directory for docker-compose in integration test. Fix logs formatting from docker container. [#19706](https://github.com/ClickHouse/ClickHouse/pull/19706) ([Ilya Yatsishin](https://github.com/qoega)). +* Fixed MemorySanitizer errors in cyrus-sasl and musl. [#19821](https://github.com/ClickHouse/ClickHouse/pull/19821) ([Ilya Yatsishin](https://github.com/qoega)). +* Add test for throwing an exception on inserting incorrect data in CollapsingMergeTree. [#19851](https://github.com/ClickHouse/ClickHouse/pull/19851) ([Kruglov Pavel](https://github.com/Avogar)). +* Adding retries for docker-compose start, stop and restart in TestFlows tests. [#19852](https://github.com/ClickHouse/ClickHouse/pull/19852) ([vzakaznikov](https://github.com/vzakaznikov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Remove useless codes'. [#19293](https://github.com/ClickHouse/ClickHouse/pull/19293) ([sundyli](https://github.com/sundy-li)). +* NO CL ENTRY: 'Merging [#19387](https://github.com/ClickHouse/ClickHouse/issues/19387)'. [#19683](https://github.com/ClickHouse/ClickHouse/pull/19683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.2.3.15-stable.md b/docs/changelogs/v21.2.3.15-stable.md new file mode 100644 index 00000000000..26653e780bb --- /dev/null +++ b/docs/changelogs/v21.2.3.15-stable.md @@ -0,0 +1,19 @@ +### ClickHouse release v21.2.3.15-stable FIXME as compared to v21.2.2.8-stable + +#### Bug Fix +* Backported in [#20241](https://github.com/ClickHouse/ClickHouse/issues/20241): Fix a bug that moving pieces to destination table may failed in case of launching multiple clickhouse-copiers. [#19743](https://github.com/ClickHouse/ClickHouse/pull/19743) ([madianjun](https://github.com/mdianjun)). +* Backported in [#20296](https://github.com/ClickHouse/ClickHouse/issues/20296): * Bugfix in StorageJoin. [#20079](https://github.com/ClickHouse/ClickHouse/pull/20079) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#20389](https://github.com/ClickHouse/ClickHouse/issues/20389): The `MongoDB` table engine now establishes connection only when it's going to read data. `ATTACH TABLE` won't try to connect anymore. [#20110](https://github.com/ClickHouse/ClickHouse/pull/20110) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#20270](https://github.com/ClickHouse/ClickHouse/issues/20270): Fix CTE when using in INSERT SELECT. This fixes [#20187](https://github.com/ClickHouse/ClickHouse/issues/20187), fixes [#20195](https://github.com/ClickHouse/ClickHouse/issues/20195). [#20211](https://github.com/ClickHouse/ClickHouse/pull/20211) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#20329](https://github.com/ClickHouse/ClickHouse/issues/20329): Fix rare server crash on config reload during the shutdown. Fixes [#19689](https://github.com/ClickHouse/ClickHouse/issues/19689). [#20224](https://github.com/ClickHouse/ClickHouse/pull/20224) ([alesapin](https://github.com/alesapin)). +* Backported in [#20324](https://github.com/ClickHouse/ClickHouse/issues/20324): Fix exception during vertical merge for `MergeTree` table engines family which don't allow to perform vertical merges. Fixes [#20259](https://github.com/ClickHouse/ClickHouse/issues/20259). [#20279](https://github.com/ClickHouse/ClickHouse/pull/20279) ([alesapin](https://github.com/alesapin)). +* Backported in [#20333](https://github.com/ClickHouse/ClickHouse/issues/20333): Restrict to `DROP` or `RENAME` version column of `*CollapsingMergeTree` and `ReplacingMergeTree` table engines. [#20300](https://github.com/ClickHouse/ClickHouse/pull/20300) ([alesapin](https://github.com/alesapin)). +* Backported in [#20365](https://github.com/ClickHouse/ClickHouse/issues/20365): Fix too often retries of failed background tasks for `ReplicatedMergeTree` table engines family. This could lead to too verbose logging and increased CPU load. Fixes [#20203](https://github.com/ClickHouse/ClickHouse/issues/20203). [#20335](https://github.com/ClickHouse/ClickHouse/pull/20335) ([alesapin](https://github.com/alesapin)). +* Backported in [#20379](https://github.com/ClickHouse/ClickHouse/issues/20379): Fix incorrect result of binary operations between two constant decimals of different scale. Fixes [#20283](https://github.com/ClickHouse/ClickHouse/issues/20283). [#20339](https://github.com/ClickHouse/ClickHouse/pull/20339) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#20376](https://github.com/ClickHouse/ClickHouse/issues/20376): Fix null dereference with `join_use_nulls=1`. [#20344](https://github.com/ClickHouse/ClickHouse/pull/20344) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20361](https://github.com/ClickHouse/ClickHouse/issues/20361): Avoid invalid dereference in RANGE_HASHED() dictionary. [#20345](https://github.com/ClickHouse/ClickHouse/pull/20345) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#20224](https://github.com/ClickHouse/ClickHouse/issues/20224) to 21.2: Fix access control manager destruction order"'. [#20397](https://github.com/ClickHouse/ClickHouse/pull/20397) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v21.2.4.6-stable.md b/docs/changelogs/v21.2.4.6-stable.md new file mode 100644 index 00000000000..1605186701d --- /dev/null +++ b/docs/changelogs/v21.2.4.6-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.2.4.6-stable FIXME as compared to v21.2.3.15-stable + +#### Bug Fix +* Backported in [#20510](https://github.com/ClickHouse/ClickHouse/issues/20510): Fixed the behavior when in case of broken JSON we tried to read the whole file into memory which leads to exception from the allocator. Fixes [#19719](https://github.com/ClickHouse/ClickHouse/issues/19719). [#20286](https://github.com/ClickHouse/ClickHouse/pull/20286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#20575](https://github.com/ClickHouse/ClickHouse/issues/20575): Check if table function `view` is used in expression list and throw an error. This fixes [#20342](https://github.com/ClickHouse/ClickHouse/issues/20342). [#20350](https://github.com/ClickHouse/ClickHouse/pull/20350) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#20486](https://github.com/ClickHouse/ClickHouse/issues/20486): Fix `LOGICAL_ERROR` for `join_use_nulls=1` when JOIN contains const from SELECT. [#20461](https://github.com/ClickHouse/ClickHouse/pull/20461) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20535](https://github.com/ClickHouse/ClickHouse/issues/20535): Fix infinite loop when propagating WITH aliases to subqueries. This fixes [#20388](https://github.com/ClickHouse/ClickHouse/issues/20388). [#20476](https://github.com/ClickHouse/ClickHouse/pull/20476) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#20615](https://github.com/ClickHouse/ClickHouse/issues/20615): Add proper checks while parsing directory names for async INSERT (fixes SIGSEGV). [#20498](https://github.com/ClickHouse/ClickHouse/pull/20498) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20887](https://github.com/ClickHouse/ClickHouse/issues/20887): Fix subquery with union distinct and limit clause. close [#20597](https://github.com/ClickHouse/ClickHouse/issues/20597). [#20610](https://github.com/ClickHouse/ClickHouse/pull/20610) ([flynn](https://github.com/ucasfl)). +* Backported in [#20993](https://github.com/ClickHouse/ClickHouse/issues/20993): Fix usage of `-Distinct` combinator with `-State` combinator in aggregate functions. [#20866](https://github.com/ClickHouse/ClickHouse/pull/20866) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#20987](https://github.com/ClickHouse/ClickHouse/issues/20987): `USE database;` query did not work when using MySQL 5.7 client to connect to ClickHouse server, it's fixed. Fixes [#18926](https://github.com/ClickHouse/ClickHouse/issues/18926). [#20878](https://github.com/ClickHouse/ClickHouse/pull/20878) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v21.2.5.5-stable.md b/docs/changelogs/v21.2.5.5-stable.md new file mode 100644 index 00000000000..b5275e89519 --- /dev/null +++ b/docs/changelogs/v21.2.5.5-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.2.5.5-stable FIXME as compared to v21.2.4.6-stable + +#### Bug Fix +* Backported in [#20574](https://github.com/ClickHouse/ClickHouse/issues/20574): Fix crash which could happen if unknown packet was received from remove query (was introduced in [#17868](https://github.com/ClickHouse/ClickHouse/issues/17868)). [#20547](https://github.com/ClickHouse/ClickHouse/pull/20547) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21006](https://github.com/ClickHouse/ClickHouse/issues/21006): Fix 'Empty task was returned from async task queue' on query cancellation. [#20881](https://github.com/ClickHouse/ClickHouse/pull/20881) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21132](https://github.com/ClickHouse/ClickHouse/issues/21132): Fixed behaviour, when `ALTER MODIFY COLUMN` created mutation, that will knowingly fail. [#21007](https://github.com/ClickHouse/ClickHouse/pull/21007) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#21251](https://github.com/ClickHouse/ClickHouse/issues/21251): - Block parallel insertions into storage join. [#21009](https://github.com/ClickHouse/ClickHouse/pull/21009) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#21069](https://github.com/ClickHouse/ClickHouse/issues/21069): Out of bound memory access was possible when formatting specifically crafted out of range value of type `DateTime64`. This closes [#20494](https://github.com/ClickHouse/ClickHouse/issues/20494). This closes [#20543](https://github.com/ClickHouse/ClickHouse/issues/20543). [#21023](https://github.com/ClickHouse/ClickHouse/pull/21023) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#21160](https://github.com/ClickHouse/ClickHouse/issues/21160): Fix `input_format_null_as_default` take effective when types are nullable. This fixes [#21116](https://github.com/ClickHouse/ClickHouse/issues/21116) . [#21121](https://github.com/ClickHouse/ClickHouse/pull/21121) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#21228](https://github.com/ClickHouse/ClickHouse/issues/21228): Fixes [#21112](https://github.com/ClickHouse/ClickHouse/issues/21112). Fixed bug that could cause duplicates with insert query (if one of the callbacks came a little too late). [#21138](https://github.com/ClickHouse/ClickHouse/pull/21138) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#21276](https://github.com/ClickHouse/ClickHouse/issues/21276): Fix bug with `join_use_nulls` and joining `TOTALS` from subqueries. This closes [#19362](https://github.com/ClickHouse/ClickHouse/issues/19362) and [#21137](https://github.com/ClickHouse/ClickHouse/issues/21137). [#21248](https://github.com/ClickHouse/ClickHouse/pull/21248) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v21.2.6.1-stable.md b/docs/changelogs/v21.2.6.1-stable.md new file mode 100644 index 00000000000..1f28c14c485 --- /dev/null +++ b/docs/changelogs/v21.2.6.1-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.2.6.1-stable FIXME as compared to v21.2.5.5-stable + +#### Bug Fix +* Backported in [#21352](https://github.com/ClickHouse/ClickHouse/issues/21352): Fix the number of threads for scalar subqueries and subqueries for index (after [#19007](https://github.com/ClickHouse/ClickHouse/issues/19007) single thread was always used). Fixes [#20457](https://github.com/ClickHouse/ClickHouse/issues/20457), [#20512](https://github.com/ClickHouse/ClickHouse/issues/20512). [#20550](https://github.com/ClickHouse/ClickHouse/pull/20550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#21262](https://github.com/ClickHouse/ClickHouse/issues/21262): fix default_replica_path and default_replica_name values are useless on Replicated(*)MergeTree engine when the engine needs specify other parameters. [#21060](https://github.com/ClickHouse/ClickHouse/pull/21060) ([mxzlxy](https://github.com/mxzlxy)). +* Backported in [#21156](https://github.com/ClickHouse/ClickHouse/issues/21156): fix bug related to cast tuple to map. Closes [#21029](https://github.com/ClickHouse/ClickHouse/issues/21029). [#21120](https://github.com/ClickHouse/ClickHouse/pull/21120) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#21234](https://github.com/ClickHouse/ClickHouse/issues/21234): Now mutations allowed only for table engines that support them (MergeTree family, Memory, MaterializedView). Other engines will report a more clear error. Fixes [#21168](https://github.com/ClickHouse/ClickHouse/issues/21168). [#21183](https://github.com/ClickHouse/ClickHouse/pull/21183) ([alesapin](https://github.com/alesapin)). +* Backported in [#21427](https://github.com/ClickHouse/ClickHouse/issues/21427): Fix crash in `EXPLAIN` for query with `UNION`. Fixes [#20876](https://github.com/ClickHouse/ClickHouse/issues/20876), [#21170](https://github.com/ClickHouse/ClickHouse/issues/21170). [#21246](https://github.com/ClickHouse/ClickHouse/pull/21246) ([flynn](https://github.com/ucasfl)). +* Backported in [#21301](https://github.com/ClickHouse/ClickHouse/issues/21301): Fix redundant reconnects to ZooKeeper and the possibility of two active sessions for a single clickhouse server. Both problems introduced in [#14678](https://github.com/ClickHouse/ClickHouse/issues/14678). [#21264](https://github.com/ClickHouse/ClickHouse/pull/21264) ([alesapin](https://github.com/alesapin)). +* Backported in [#21553](https://github.com/ClickHouse/ClickHouse/issues/21553): Now `ALTER MODIFY COLUMN` queries will correctly affect changes in partition key, skip indices, TTLs, and so on. Fixes [#13675](https://github.com/ClickHouse/ClickHouse/issues/13675). [#21334](https://github.com/ClickHouse/ClickHouse/pull/21334) ([alesapin](https://github.com/alesapin)). +* Backported in [#21380](https://github.com/ClickHouse/ClickHouse/issues/21380): Fix error `Bad cast from type ... to DB::ColumnLowCardinality` while inserting into table with `LowCardinality` column from `Values` format. Fixes [#21140](https://github.com/ClickHouse/ClickHouse/issues/21140). [#21357](https://github.com/ClickHouse/ClickHouse/pull/21357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.2.7.11-stable.md b/docs/changelogs/v21.2.7.11-stable.md new file mode 100644 index 00000000000..1f0f94ee0bf --- /dev/null +++ b/docs/changelogs/v21.2.7.11-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.2.7.11-stable FIXME as compared to v21.2.6.1-stable + +#### Bug Fix +* Backported in [#21206](https://github.com/ClickHouse/ClickHouse/issues/21206): Fix the metadata leak when the Replicated*MergeTree with custom (non default) ZooKeeper cluster is dropped. [#21119](https://github.com/ClickHouse/ClickHouse/pull/21119) ([fastio](https://github.com/fastio)). +* Backported in [#21927](https://github.com/ClickHouse/ClickHouse/issues/21927): Fix Avro format parsing for Kafka. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)). +* Backported in [#21855](https://github.com/ClickHouse/ClickHouse/issues/21855): Fix possible error ` Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21688](https://github.com/ClickHouse/ClickHouse/issues/21688): Fix fsync_part_directory for horizontal merge. [#21642](https://github.com/ClickHouse/ClickHouse/pull/21642) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21795](https://github.com/ClickHouse/ClickHouse/issues/21795): Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21879](https://github.com/ClickHouse/ClickHouse/issues/21879): Fix possible crashes in aggregate functions with combinator Distinct, while using two-level aggregation. This is a follow-up fix of https://github.com/ClickHouse/ClickHouse/pull/18365 . Can only reproduced in production env. No test case available yet. cc @CurtizJ. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#21979](https://github.com/ClickHouse/ClickHouse/issues/21979): Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#22140](https://github.com/ClickHouse/ClickHouse/issues/22140): The function `decrypt` was lacking a check for the minimal size of data encrypted in AEAD mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.2.8.31-stable.md b/docs/changelogs/v21.2.8.31-stable.md new file mode 100644 index 00000000000..884dcb5a649 --- /dev/null +++ b/docs/changelogs/v21.2.8.31-stable.md @@ -0,0 +1,30 @@ +### ClickHouse release v21.2.8.31-stable FIXME as compared to v21.2.7.11-stable + +#### Bug Fix +* Backported in [#22191](https://github.com/ClickHouse/ClickHouse/issues/22191): Fixed race on SSL object inside SecureSocket in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#22336](https://github.com/ClickHouse/ClickHouse/issues/22336): Fix table function `clusterAllReplicas` returns wrong `_shard_num`. close [#21481](https://github.com/ClickHouse/ClickHouse/issues/21481). [#21498](https://github.com/ClickHouse/ClickHouse/pull/21498) ([flynn](https://github.com/ucasfl)). +* Backported in [#22092](https://github.com/ClickHouse/ClickHouse/issues/22092): In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22318](https://github.com/ClickHouse/ClickHouse/issues/22318): Remove unknown columns from joined table in where for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#22285](https://github.com/ClickHouse/ClickHouse/issues/22285): Start accepting connections after DDLWorker and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21753](https://github.com/ClickHouse/ClickHouse/issues/21753): Fix concurrent `OPTIMIZE` and `DROP` for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21809](https://github.com/ClickHouse/ClickHouse/issues/21809): Fix bug for ReplicatedMerge table engines when `ALTER MODIFY COLUMN` query doesn't change the type of decimal column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)). +* Backported in [#22188](https://github.com/ClickHouse/ClickHouse/issues/22188): Reverted S3 connection pools. [#21737](https://github.com/ClickHouse/ClickHouse/pull/21737) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#22240](https://github.com/ClickHouse/ClickHouse/issues/22240): Fix scalar subquery index analysis. This fixes [#21717](https://github.com/ClickHouse/ClickHouse/issues/21717) , which was introduced in https://github.com/ClickHouse/ClickHouse/pull/18896 . [#21766](https://github.com/ClickHouse/ClickHouse/pull/21766) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22050](https://github.com/ClickHouse/ClickHouse/issues/22050): Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#22464](https://github.com/ClickHouse/ClickHouse/issues/22464): In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#22204](https://github.com/ClickHouse/ClickHouse/issues/22204): Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)). +* Backported in [#22278](https://github.com/ClickHouse/ClickHouse/issues/22278): Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)). +* Backported in [#22266](https://github.com/ClickHouse/ClickHouse/issues/22266): Fix the background thread pool name. [#22122](https://github.com/ClickHouse/ClickHouse/pull/22122) ([fastio](https://github.com/fastio)). +* Backported in [#22315](https://github.com/ClickHouse/ClickHouse/issues/22315): Fix docker entrypoint in case `http_port` is not in the config. [#22132](https://github.com/ClickHouse/ClickHouse/pull/22132) ([Ewout](https://github.com/devwout)). +* Backported in [#22502](https://github.com/ClickHouse/ClickHouse/issues/22502): Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22369](https://github.com/ClickHouse/ClickHouse/issues/22369): Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)). +* Backported in [#22530](https://github.com/ClickHouse/ClickHouse/issues/22530): Buffer overflow (on read) was possible in `tokenbf_v1` full text index. The excessive bytes are not used but the read operation may lead to crash in rare cases. This closes [#19233](https://github.com/ClickHouse/ClickHouse/issues/19233). [#22421](https://github.com/ClickHouse/ClickHouse/pull/22421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22461](https://github.com/ClickHouse/ClickHouse/issues/22461): Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22557](https://github.com/ClickHouse/ClickHouse/issues/22557): Fix bug in partial merge join with `LowCardinality`. Close [#22386](https://github.com/ClickHouse/ClickHouse/issues/22386), close [#22388](https://github.com/ClickHouse/ClickHouse/issues/22388). [#22510](https://github.com/ClickHouse/ClickHouse/pull/22510) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#22606](https://github.com/ClickHouse/ClickHouse/issues/22606): Fix deserialization of empty string without newline at end of TSV format. This closes [#20244](https://github.com/ClickHouse/ClickHouse/issues/20244). Possible workaround without version update: set `input_format_null_as_default` to zero. It was zero in old versions. [#22527](https://github.com/ClickHouse/ClickHouse/pull/22527) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22575](https://github.com/ClickHouse/ClickHouse/issues/22575): Fix UB by unlocking the rwlock of the TinyLog from the same thread. [#22560](https://github.com/ClickHouse/ClickHouse/pull/22560) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22648](https://github.com/ClickHouse/ClickHouse/issues/22648): Avoid UB in *Log engines for rwlock unlock due to unlock from another thread. [#22583](https://github.com/ClickHouse/ClickHouse/pull/22583) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22682](https://github.com/ClickHouse/ClickHouse/issues/22682): Fix LOGICAL_ERROR for Log with nested types w/o columns in the SELECT clause. [#22654](https://github.com/ClickHouse/ClickHouse/pull/22654) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22699](https://github.com/ClickHouse/ClickHouse/issues/22699): Fix wait for mutations on several replicas for ReplicatedMergeTree table engines. Previously, mutation/alter query may finish before mutation actually executed on other replicas. [#22669](https://github.com/ClickHouse/ClickHouse/pull/22669) ([alesapin](https://github.com/alesapin)). +* Backported in [#22740](https://github.com/ClickHouse/ClickHouse/issues/22740): Fix possible hangs in zk requests in case of OOM exception. Fixes [#22438](https://github.com/ClickHouse/ClickHouse/issues/22438). [#22684](https://github.com/ClickHouse/ClickHouse/pull/22684) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.2.9.41-stable.md b/docs/changelogs/v21.2.9.41-stable.md new file mode 100644 index 00000000000..ab4303aaa2a --- /dev/null +++ b/docs/changelogs/v21.2.9.41-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v21.2.9.41-stable FIXME as compared to v21.2.8.31-stable + +#### Improvement +* Backported in [#22818](https://github.com/ClickHouse/ClickHouse/issues/22818): Make FQDN and other DNS related functions work correctly in alpine images. [#20336](https://github.com/ClickHouse/ClickHouse/pull/20336) ([filimonov](https://github.com/filimonov)). +* Backported in [#22812](https://github.com/ClickHouse/ClickHouse/issues/22812): If PODArray was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#22965](https://github.com/ClickHouse/ClickHouse/issues/22965): Fix very rare bug when quorum insert with `quorum_parallel=1` is not really "quorum" because of deduplication. [#18215](https://github.com/ClickHouse/ClickHouse/pull/18215) ([filimonov](https://github.com/filimonov)). +* Backported in [#22719](https://github.com/ClickHouse/ClickHouse/issues/22719): Check if table function view is used as a column. This complements https://github.com/ClickHouse/ClickHouse/pull/20350. [#21465](https://github.com/ClickHouse/ClickHouse/pull/21465) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22756](https://github.com/ClickHouse/ClickHouse/issues/22756): Fix usage of function `map` in distributed queries. [#22588](https://github.com/ClickHouse/ClickHouse/pull/22588) ([foolchi](https://github.com/foolchi)). +* Backported in [#22889](https://github.com/ClickHouse/ClickHouse/issues/22889): Fix approx total rows accounting for reverse reading from MergeTree. [#22726](https://github.com/ClickHouse/ClickHouse/pull/22726) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22915](https://github.com/ClickHouse/ClickHouse/issues/22915): LIVE VIEW (experimental feature). Fix possible hanging in concurrent DROP/CREATE of TEMPORARY LIVE VIEW in `TemporaryLiveViewCleaner`, see https://gist.github.com/vzakaznikov/0c03195960fc86b56bfe2bc73a90019e. [#22858](https://github.com/ClickHouse/ClickHouse/pull/22858) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#22918](https://github.com/ClickHouse/ClickHouse/issues/22918): Fixed a crash when using `mannWhitneyUTest` and `rankCorr` with window functions. This fixes [#22728](https://github.com/ClickHouse/ClickHouse/issues/22728). [#22876](https://github.com/ClickHouse/ClickHouse/pull/22876) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#22814](https://github.com/ClickHouse/ClickHouse/issues/22814): Allow to start up with modified binary under gdb. In previous version if you set up breakpoint in gdb before start, server will refuse to start up due to failed integrity check. [#21258](https://github.com/ClickHouse/ClickHouse/pull/21258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.3.1.6185-prestable.md b/docs/changelogs/v21.3.1.6185-prestable.md new file mode 100644 index 00000000000..dabfe3cfeb3 --- /dev/null +++ b/docs/changelogs/v21.3.1.6185-prestable.md @@ -0,0 +1,159 @@ +### ClickHouse release v21.3.1.6185-prestable FIXME as compared to v21.2.1.5869-prestable + +#### Backward Incompatible Change +* Now all case-insensitive function names will be lower-cased during query analysis. This is needed for projection query routing. [#20174](https://github.com/ClickHouse/ClickHouse/pull/20174) ([Amos Bird](https://github.com/amosbird)). +* Now it's not allowed to create MergeTree tables in old syntax with table TTL because it's just ignored. Attach of old tables is still possible. [#20282](https://github.com/ClickHouse/ClickHouse/pull/20282) ([alesapin](https://github.com/alesapin)). + +#### New Feature +* Add experimental `Replicated` database engine. It replicates DDL queries across multiple hosts. [#16193](https://github.com/ClickHouse/ClickHouse/pull/16193) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Distributed query deduplication is a followup to [#16033](https://github.com/ClickHouse/ClickHouse/issues/16033) and partially resolves the proposal [#13574](https://github.com/ClickHouse/ClickHouse/issues/13574). [#17348](https://github.com/ClickHouse/ClickHouse/pull/17348) ([xjewer](https://github.com/xjewer)). +* Add the ability to backup/restore metadata files for DiskS3. [#18377](https://github.com/ClickHouse/ClickHouse/pull/18377) ([Pavel Kovalenko](https://github.com/Jokser)). +* - Included in the pull request. [#18508](https://github.com/ClickHouse/ClickHouse/pull/18508) ([PHO](https://github.com/depressed-pho)). +* Added file() function to read file as a String. This close [#issue:18851](https://github.com/ClickHouse/ClickHouse/issues/18851). [#19204](https://github.com/ClickHouse/ClickHouse/pull/19204) ([keenwolf](https://github.com/keen-wolf)). +* Tables with `MergeTree*` engine now have two new table-level settings for query concurrency control. Setting `max_concurrent_queries` limits the number of concurrently executed queries which are related to this table. Setting `min_marks_to_honor_max_concurrent_queries` tells to apply previous setting only if query reads at least this number of marks. [#19544](https://github.com/ClickHouse/ClickHouse/pull/19544) ([Amos Bird](https://github.com/amosbird)). +* - Mentioned in [#18454](https://github.com/ClickHouse/ClickHouse/issues/18454) - add function `htmlOrxmlCoarseParse`; - support `` parse; - support `` parse; - support `` parse; - support white space collapse; - support any `` format parse; - HyperScan to support SIMD; - Everything is done in a single pass. [#19600](https://github.com/ClickHouse/ClickHouse/pull/19600) ([zlx19950903](https://github.com/zlx19950903)). +* Add quota type QUERY_SELECTS and QUERY_INSERTS. [#19603](https://github.com/ClickHouse/ClickHouse/pull/19603) ([JackyWoo](https://github.com/JackyWoo)). +* ExecutableDictionarySource added implicit_key option. Fixes [#14527](https://github.com/ClickHouse/ClickHouse/issues/14527). [#19677](https://github.com/ClickHouse/ClickHouse/pull/19677) ([Maksim Kita](https://github.com/kitaisreal)). +* Added Server Side Encryption Customer Keys (the `x-amz-server-side-encryption-customer-(key/md5)` header) support in S3 client. See [the link](https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html). Closes [#19428](https://github.com/ClickHouse/ClickHouse/issues/19428). [#19748](https://github.com/ClickHouse/ClickHouse/pull/19748) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Function `reinterpretAs` updated to support big integers. Fixes [#19691](https://github.com/ClickHouse/ClickHouse/issues/19691). [#19858](https://github.com/ClickHouse/ClickHouse/pull/19858) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting `insert_shard_id` to support insert data into specific shard from distributed table. [#19961](https://github.com/ClickHouse/ClickHouse/pull/19961) ([flynn](https://github.com/ucasfl)). +* Added timezoneOffset(datetime) function which will give the offset from UTC in seconds. This close [#issue:19850](https://github.com/ClickHouse/ClickHouse/issues/19850). [#19962](https://github.com/ClickHouse/ClickHouse/pull/19962) ([keenwolf](https://github.com/keen-wolf)). +* New `event_time_microseconds column` in `system.part_log` table. [#20027](https://github.com/ClickHouse/ClickHouse/pull/20027) ([Bharat Nallan](https://github.com/bharatnc)). +* ... Add aggregate function `deltaSum` for summing the differences between consecutive rows. [#20057](https://github.com/ClickHouse/ClickHouse/pull/20057) ([Russ Frank](https://github.com/rf)). +* Add two settings to delay or throw error during insertion when there are too many inactive parts. This is useful when server fails to clean up parts quickly enough. [#20178](https://github.com/ClickHouse/ClickHouse/pull/20178) ([Amos Bird](https://github.com/amosbird)). +* Add file engine settings: `engine_file_empty_if_not_exists` and `engine_file_truncate_on_insert`. [#20620](https://github.com/ClickHouse/ClickHouse/pull/20620) ([M0r64n](https://github.com/M0r64n)). + +#### Performance Improvement +* Add parallel select final for one part with level>0 when `do_not_merge_across_partitions_select_final` setting is 1. [#19375](https://github.com/ClickHouse/ClickHouse/pull/19375) ([Kruglov Pavel](https://github.com/Avogar)). +* Improved performance of bitmap columns during joins. [#19407](https://github.com/ClickHouse/ClickHouse/pull/19407) ([templarzq](https://github.com/templarzq)). +* Partially reimplement HTTP server to make it making less copies of incoming and outgoing data. It gives up to 1.5 performance improvement on inserting long records over HTTP. [#19516](https://github.com/ClickHouse/ClickHouse/pull/19516) ([Ivan](https://github.com/abyss7)). +* Improve performance of aggregate functions by more strict aliasing. [#19946](https://github.com/ClickHouse/ClickHouse/pull/19946) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the case when DataType parser may have exponential complexity (found by fuzzer). This closes [#20096](https://github.com/ClickHouse/ClickHouse/issues/20096). [#20132](https://github.com/ClickHouse/ClickHouse/pull/20132) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `compress` setting for `Memory` tables. If it's enabled the table will use less RAM. On some machines and datasets it can also work faster on SELECT, but it is not always the case. This closes [#20093](https://github.com/ClickHouse/ClickHouse/issues/20093). Note: there are reasons why Memory tables can work slower than MergeTree: (1) lack of compression (2) static size of blocks (3) lack of indices and prewhere... [#20168](https://github.com/ClickHouse/ClickHouse/pull/20168) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not squash blocks too much on INSERT SELECT if inserting into Memory table. In previous versions inefficient data representation was created in Memory table after INSERT SELECT. This closes [#13052](https://github.com/ClickHouse/ClickHouse/issues/13052). [#20169](https://github.com/ClickHouse/ClickHouse/pull/20169) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improved performance of aggregation by several fixed size fields (unconfirmed). [#20454](https://github.com/ClickHouse/ClickHouse/pull/20454) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up reading from `Memory` tables in extreme cases (when reading speed is in order of 50 GB/sec) by simplification of pipeline and (consequently) less lock contention in pipeline scheduling. [#20468](https://github.com/ClickHouse/ClickHouse/pull/20468) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of GROUP BY multiple fixed size keys. [#20472](https://github.com/ClickHouse/ClickHouse/pull/20472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The setting `distributed_aggregation_memory_efficient` is enabled by default. It will lower memory usage and improve performance of distributed queries. [#20599](https://github.com/ClickHouse/ClickHouse/pull/20599) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better code in aggregation. [#20978](https://github.com/ClickHouse/ClickHouse/pull/20978) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add back intDiv/module vectorConstant specializations for better performance. This fixes [#21293](https://github.com/ClickHouse/ClickHouse/issues/21293) . The regression was introduced in https://github.com/ClickHouse/ClickHouse/pull/18145 . [#21307](https://github.com/ClickHouse/ClickHouse/pull/21307) ([Amos Bird](https://github.com/amosbird)). + +#### Improvement +* Fix creation of `TTL` in cases, when its expression is a function and it is the same as `ORDER BY` key. Now it's allowed to set custom aggregation to primary key columns in `TTL` with `GROUP BY`. Backward incompatible: For primary key columns, which are not in `GROUP BY` and aren't set explicitly now is applied function `any` instead of `max`, when TTL is expired. Also if you use TTL with `WHERE` or `GROUP BY` you can see exceptions at merges, while making rolling update. [#15450](https://github.com/ClickHouse/ClickHouse/pull/15450) ([Anton Popov](https://github.com/CurtizJ)). +* Hedged Requests for remote queries. When setting `use_hedged_requests` enabled (by default), allow to establish many connections with different replicas for query. New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout` or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`); other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported. [#19291](https://github.com/ClickHouse/ClickHouse/pull/19291) ([Kruglov Pavel](https://github.com/Avogar)). +* Print inline frames for fatal stacktraces. [#19317](https://github.com/ClickHouse/ClickHouse/pull/19317) ([Ivan](https://github.com/abyss7)). +* Do not silently ignore write errors. [#19451](https://github.com/ClickHouse/ClickHouse/pull/19451) ([Azat Khuzhin](https://github.com/azat)). +* Added support for `PREWHERE` when tables have row-level security expressions specified. [#19576](https://github.com/ClickHouse/ClickHouse/pull/19576) ([Denis Glazachev](https://github.com/traceon)). +* Add IStoragePolicy interface. [#19608](https://github.com/ClickHouse/ClickHouse/pull/19608) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Add ability to throttle INSERT into Distributed based on amount of pending bytes for async send (`bytes_to_delay_insert`/`max_delay_to_insert` and `bytes_to_throw_insert` settings for `Distributed` engine has been added). [#19673](https://github.com/ClickHouse/ClickHouse/pull/19673) ([Azat Khuzhin](https://github.com/azat)). +* move Conditions that are not related to JOIN to where clause. [#18720](https://github.com/ClickHouse/ClickHouse/issues/18720). [#19685](https://github.com/ClickHouse/ClickHouse/pull/19685) ([hexiaoting](https://github.com/hexiaoting)). +* Add separate config directive for Buffer profile. [#19721](https://github.com/ClickHouse/ClickHouse/pull/19721) ([Azat Khuzhin](https://github.com/azat)). +* Show MaterializeMySQL tables in `system.parts`. [#19770](https://github.com/ClickHouse/ClickHouse/pull/19770) ([Stig Bakken](https://github.com/stigsb)). +* Initialize MaxDDLEntryID to the last value after restarting. Before this PR, MaxDDLEntryID will remain zero until a new DDLTask is processed. [#19924](https://github.com/ClickHouse/ClickHouse/pull/19924) ([Amos Bird](https://github.com/amosbird)). +* Add conversion of block structure for INSERT into Distributed tables if it does not match. [#19947](https://github.com/ClickHouse/ClickHouse/pull/19947) ([Azat Khuzhin](https://github.com/azat)). +* If user calls `JSONExtract` function with `Float32` type requested, allow inaccurate conversion to the result type. For example the number `0.1` in JSON is double precision and is not representable in Float32, but the user still wants to get it. Previous versions return 0 for non-Nullable type and NULL for Nullable type to indicate that conversion is imprecise. The logic was 100% correct but it was surprising to users and leading to questions. This closes [#13962](https://github.com/ClickHouse/ClickHouse/issues/13962). [#19960](https://github.com/ClickHouse/ClickHouse/pull/19960) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The value of MYSQL_OPT_RECONNECT option can now be controlled by "opt_reconnect" parameter in the config section of mysql replica. [#19998](https://github.com/ClickHouse/ClickHouse/pull/19998) ([Alexander Kazakov](https://github.com/Akazz)). +* Return `DiskType` instead of `String` in IDisk::getType() as in the rest of storage interfaces. [#19999](https://github.com/ClickHouse/ClickHouse/pull/19999) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Fix data race in executable dictionary that was possible only on misuse (when the script returns data ignoring its input). [#20045](https://github.com/ClickHouse/ClickHouse/pull/20045) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Show full details of MaterializeMySQL tables in `system.tables`. [#20051](https://github.com/ClickHouse/ClickHouse/pull/20051) ([Stig Bakken](https://github.com/stigsb)). +* Supports system.zookeeper path IN query. [#20105](https://github.com/ClickHouse/ClickHouse/pull/20105) ([小路](https://github.com/nicelulu)). +* 1. SHOW TABLES is now considered as one query in the quota calculations, not two queries. 2. SYSTEM queries now consume quota. 3. Fix calculation of interval's end in quota consumption. [#20106](https://github.com/ClickHouse/ClickHouse/pull/20106) ([Vitaly Baranov](https://github.com/vitlibar)). +* - Fix toDateTime64(toDate()/toDateTime()) for DateTime64 - Implement DateTime64 clamping to match DateTime behaviour. [#20131](https://github.com/ClickHouse/ClickHouse/pull/20131) ([Azat Khuzhin](https://github.com/azat)). +* The setting `access_management` is now configurable on startup by providing `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT`, defaults to disabled (`0`) which was the prior value. [#20139](https://github.com/ClickHouse/ClickHouse/pull/20139) ([Marquitos](https://github.com/sonirico)). +* Updated `CacheDictionary`, `ComplexCacheDictionary`, `SSDCacheDictionary`, `SSDComplexKeyDictionary` to use LRUHashMap as underlying index. [#20164](https://github.com/ClickHouse/ClickHouse/pull/20164) ([Maksim Kita](https://github.com/kitaisreal)). +* Support all native integer types in bitmap functions. [#20171](https://github.com/ClickHouse/ClickHouse/pull/20171) ([Amos Bird](https://github.com/amosbird)). +* Normalize count(constant), sum(1) to count(). This is needed for projection query routing. [#20175](https://github.com/ClickHouse/ClickHouse/pull/20175) ([Amos Bird](https://github.com/amosbird)). +* Perform algebraic optimizations of arithmetic expressions inside `avg` aggregate function. close [#20092](https://github.com/ClickHouse/ClickHouse/issues/20092). [#20183](https://github.com/ClickHouse/ClickHouse/pull/20183) ([flynn](https://github.com/ucasfl)). +* Lockless `SYSTEM FLUSH DISTRIBUTED`. [#20215](https://github.com/ClickHouse/ClickHouse/pull/20215) ([Azat Khuzhin](https://github.com/azat)). +* Implicit conversion from integer to Dicimal type might succeeded if integer value doe not fit into Decimal type. Now it throws `ARGUMENT_OUT_OF_BOUND`. [#20232](https://github.com/ClickHouse/ClickHouse/pull/20232) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not allow early constant folding of explicitly forbidden functions. [#20303](https://github.com/ClickHouse/ClickHouse/pull/20303) ([Azat Khuzhin](https://github.com/azat)). +* Make FQDN and other DNS related functions work correctly in alpine images. [#20336](https://github.com/ClickHouse/ClickHouse/pull/20336) ([filimonov](https://github.com/filimonov)). +* Fixed race between execution of distributed DDL tasks and cleanup of DDL queue. Now DDL task cannot be removed from ZooKeeper if there are active workers. Fixes [#20016](https://github.com/ClickHouse/ClickHouse/issues/20016). [#20448](https://github.com/ClickHouse/ClickHouse/pull/20448) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improved serialization for data types combined of Arrays and Tuples. Improved matching enum data types to protobuf enum type. Fixed serialization of the `Map` data type. Omitted values are now set by default. [#20506](https://github.com/ClickHouse/ClickHouse/pull/20506) ([Vitaly Baranov](https://github.com/vitlibar)). +* https://github.com/ClickHouse/ClickHouse/issues/20576. [#20596](https://github.com/ClickHouse/ClickHouse/pull/20596) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Function 'reinterpretAs(x, Type)' renamed into 'reinterpret(x, Type)'. [#20611](https://github.com/ClickHouse/ClickHouse/pull/20611) ([Maksim Kita](https://github.com/kitaisreal)). +* When loading config for mysql source ClickHouse will now randomize the list of replicas with the same priority to ensure the round-robin logics of picking mysql endpoint. This closes [#20629](https://github.com/ClickHouse/ClickHouse/issues/20629). [#20632](https://github.com/ClickHouse/ClickHouse/pull/20632) ([Alexander Kazakov](https://github.com/Akazz)). +* Do only merging of sorted blocks on initiator with distributed_group_by_no_merge. [#20882](https://github.com/ClickHouse/ClickHouse/pull/20882) ([Azat Khuzhin](https://github.com/azat)). +* - Fill only requested columns when querying system.parts & system.parts_columns. Closes [#19570](https://github.com/ClickHouse/ClickHouse/issues/19570). ... [#21035](https://github.com/ClickHouse/ClickHouse/pull/21035) ([Anmol Arora](https://github.com/anmolarora)). +* Usability improvement: more consistent `DateTime64` parsing: recognize the case when unix timestamp with subsecond resolution is specified as scaled integer (like `1111111111222` instead of `1111111111.222`). This closes [#13194](https://github.com/ClickHouse/ClickHouse/issues/13194). [#21053](https://github.com/ClickHouse/ClickHouse/pull/21053) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* MySQL dictionary source will now retry unexpected connection failures (Lost connection to MySQL server during query) which sometimes happen on SSL/TLS connections. [#21237](https://github.com/ClickHouse/ClickHouse/pull/21237) ([Alexander Kazakov](https://github.com/Akazz)). +* Forbid to drop a column if it's referenced by materialized view. Closes [#21164](https://github.com/ClickHouse/ClickHouse/issues/21164). [#21303](https://github.com/ClickHouse/ClickHouse/pull/21303) ([flynn](https://github.com/ucasfl)). +* Provide better compatibility for mysql clients. 1. mysql jdbc 2. mycli. [#21367](https://github.com/ClickHouse/ClickHouse/pull/21367) ([Amos Bird](https://github.com/amosbird)). +* Case-insensitive compression methods for table functions. Also fixed `LZMA` compression method which was checked in upper case. [#21416](https://github.com/ClickHouse/ClickHouse/pull/21416) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Bug Fix +* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Fix a bug that moving pieces to destination table may failed in case of launching multiple clickhouse-copiers. [#19743](https://github.com/ClickHouse/ClickHouse/pull/19743) ([madianjun](https://github.com/mdianjun)). +* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([李扬](https://github.com/taiyang-li)). +* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* - Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). +* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). +* MaterializeMySQL: Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). +* Fix the case when calculating modulo of division of negative number by small divisor, the resulting data type was not large enough to accomodate the negative result. This closes [#20052](https://github.com/ClickHouse/ClickHouse/issues/20052). [#20067](https://github.com/ClickHouse/ClickHouse/pull/20067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* * Bugfix in StorageJoin. [#20079](https://github.com/ClickHouse/ClickHouse/pull/20079) ([Vladimir C](https://github.com/vdimir)). +* The `MongoDB` table engine now establishes connection only when it's going to read data. `ATTACH TABLE` won't try to connect anymore. [#20110](https://github.com/ClickHouse/ClickHouse/pull/20110) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). +* fix toMinute function to handle special timezone correctly. [#20149](https://github.com/ClickHouse/ClickHouse/pull/20149) ([keenwolf](https://github.com/keen-wolf)). +* Fixes [#19314](https://github.com/ClickHouse/ClickHouse/issues/19314). [#20156](https://github.com/ClickHouse/ClickHouse/pull/20156) ([Ivan](https://github.com/abyss7)). +* Fix CTE when using in INSERT SELECT. This fixes [#20187](https://github.com/ClickHouse/ClickHouse/issues/20187), fixes [#20195](https://github.com/ClickHouse/ClickHouse/issues/20195). [#20211](https://github.com/ClickHouse/ClickHouse/pull/20211) ([Amos Bird](https://github.com/amosbird)). +* Fix rare server crash on config reload during the shutdown. Fixes [#19689](https://github.com/ClickHouse/ClickHouse/issues/19689). [#20224](https://github.com/ClickHouse/ClickHouse/pull/20224) ([alesapin](https://github.com/alesapin)). +* Fix exception during vertical merge for `MergeTree` table engines family which don't allow to perform vertical merges. Fixes [#20259](https://github.com/ClickHouse/ClickHouse/issues/20259). [#20279](https://github.com/ClickHouse/ClickHouse/pull/20279) ([alesapin](https://github.com/alesapin)). +* Fixed the behavior when in case of broken JSON we tried to read the whole file into memory which leads to exception from the allocator. Fixes [#19719](https://github.com/ClickHouse/ClickHouse/issues/19719). [#20286](https://github.com/ClickHouse/ClickHouse/pull/20286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Restrict to `DROP` or `RENAME` version column of `*CollapsingMergeTree` and `ReplacingMergeTree` table engines. [#20300](https://github.com/ClickHouse/ClickHouse/pull/20300) ([alesapin](https://github.com/alesapin)). +* Fix too often retries of failed background tasks for `ReplicatedMergeTree` table engines family. This could lead to too verbose logging and increased CPU load. Fixes [#20203](https://github.com/ClickHouse/ClickHouse/issues/20203). [#20335](https://github.com/ClickHouse/ClickHouse/pull/20335) ([alesapin](https://github.com/alesapin)). +* Fix incorrect result of binary operations between two constant decimals of different scale. Fixes [#20283](https://github.com/ClickHouse/ClickHouse/issues/20283). [#20339](https://github.com/ClickHouse/ClickHouse/pull/20339) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix null dereference with `join_use_nulls=1`. [#20344](https://github.com/ClickHouse/ClickHouse/pull/20344) ([Azat Khuzhin](https://github.com/azat)). +* Avoid invalid dereference in RANGE_HASHED() dictionary. [#20345](https://github.com/ClickHouse/ClickHouse/pull/20345) ([Azat Khuzhin](https://github.com/azat)). +* Check if table function `view` is used in expression list and throw an error. This fixes [#20342](https://github.com/ClickHouse/ClickHouse/issues/20342). [#20350](https://github.com/ClickHouse/ClickHouse/pull/20350) ([Amos Bird](https://github.com/amosbird)). +* Fix `LOGICAL_ERROR` for `join_use_nulls=1` when JOIN contains const from SELECT. [#20461](https://github.com/ClickHouse/ClickHouse/pull/20461) ([Azat Khuzhin](https://github.com/azat)). +* Fix abnormal server termination when http client goes away. [#20464](https://github.com/ClickHouse/ClickHouse/pull/20464) ([Azat Khuzhin](https://github.com/azat)). +* Fix infinite loop when propagating WITH aliases to subqueries. This fixes [#20388](https://github.com/ClickHouse/ClickHouse/issues/20388). [#20476](https://github.com/ClickHouse/ClickHouse/pull/20476) ([Amos Bird](https://github.com/amosbird)). +* Fix function `transform` does not work properly for floating point keys. Closes [#20460](https://github.com/ClickHouse/ClickHouse/issues/20460). [#20479](https://github.com/ClickHouse/ClickHouse/pull/20479) ([flynn](https://github.com/ucasfl)). +* Add proper checks while parsing directory names for async INSERT (fixes SIGSEGV). [#20498](https://github.com/ClickHouse/ClickHouse/pull/20498) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash which could happen if unknown packet was received from remove query (was introduced in [#17868](https://github.com/ClickHouse/ClickHouse/issues/17868)). [#20547](https://github.com/ClickHouse/ClickHouse/pull/20547) ([Azat Khuzhin](https://github.com/azat)). +* Fix the number of threads for scalar subqueries and subqueries for index (after [#19007](https://github.com/ClickHouse/ClickHouse/issues/19007) single thread was always used). Fixes [#20457](https://github.com/ClickHouse/ClickHouse/issues/20457), [#20512](https://github.com/ClickHouse/ClickHouse/issues/20512). [#20550](https://github.com/ClickHouse/ClickHouse/pull/20550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed inconsistent behavior of dictionary in case of queries where we look for absent keys in dictionary. [#20578](https://github.com/ClickHouse/ClickHouse/pull/20578) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix subquery with union distinct and limit clause. close [#20597](https://github.com/ClickHouse/ClickHouse/issues/20597). [#20610](https://github.com/ClickHouse/ClickHouse/pull/20610) ([flynn](https://github.com/ucasfl)). +* Fix usage of `-Distinct` combinator with `-State` combinator in aggregate functions. [#20866](https://github.com/ClickHouse/ClickHouse/pull/20866) ([Anton Popov](https://github.com/CurtizJ)). +* `USE database;` query did not work when using MySQL 5.7 client to connect to ClickHouse server, it's fixed. Fixes [#18926](https://github.com/ClickHouse/ClickHouse/issues/18926). [#20878](https://github.com/ClickHouse/ClickHouse/pull/20878) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 'Empty task was returned from async task queue' on query cancellation. [#20881](https://github.com/ClickHouse/ClickHouse/pull/20881) ([Azat Khuzhin](https://github.com/azat)). +* Closes [#9969](https://github.com/ClickHouse/ClickHouse/issues/9969). Fixed Brotli http compression error, which reproduced for large data sizes, slightly complicated structure and with json output format. Update Brotli to the latest version to include the "fix rare access to uninitialized data in ring-buffer". [#20991](https://github.com/ClickHouse/ClickHouse/pull/20991) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed behaviour, when `ALTER MODIFY COLUMN` created mutation, that will knowingly fail. [#21007](https://github.com/ClickHouse/ClickHouse/pull/21007) ([Anton Popov](https://github.com/CurtizJ)). +* - Block parallel insertions into storage join. [#21009](https://github.com/ClickHouse/ClickHouse/pull/21009) ([Vladimir C](https://github.com/vdimir)). +* Out of bound memory access was possible when formatting specifically crafted out of range value of type `DateTime64`. This closes [#20494](https://github.com/ClickHouse/ClickHouse/issues/20494). This closes [#20543](https://github.com/ClickHouse/ClickHouse/issues/20543). [#21023](https://github.com/ClickHouse/ClickHouse/pull/21023) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix default_replica_path and default_replica_name values are useless on Replicated(*)MergeTree engine when the engine needs specify other parameters. [#21060](https://github.com/ClickHouse/ClickHouse/pull/21060) ([mxzlxy](https://github.com/mxzlxy)). +* Fix type mismatch issue when using LowCardinality keys in joinGet. This fixes [#21114](https://github.com/ClickHouse/ClickHouse/issues/21114). [#21117](https://github.com/ClickHouse/ClickHouse/pull/21117) ([Amos Bird](https://github.com/amosbird)). +* Fix the metadata leak when the Replicated*MergeTree with custom (non default) ZooKeeper cluster is dropped. [#21119](https://github.com/ClickHouse/ClickHouse/pull/21119) ([fastio](https://github.com/fastio)). +* fix bug related to cast tuple to map. Closes [#21029](https://github.com/ClickHouse/ClickHouse/issues/21029). [#21120](https://github.com/ClickHouse/ClickHouse/pull/21120) ([hexiaoting](https://github.com/hexiaoting)). +* Fix `input_format_null_as_default` take effective when types are nullable. This fixes [#21116](https://github.com/ClickHouse/ClickHouse/issues/21116) . [#21121](https://github.com/ClickHouse/ClickHouse/pull/21121) ([Amos Bird](https://github.com/amosbird)). +* Fixes [#21112](https://github.com/ClickHouse/ClickHouse/issues/21112). Fixed bug that could cause duplicates with insert query (if one of the callbacks came a little too late). [#21138](https://github.com/ClickHouse/ClickHouse/pull/21138) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Now mutations allowed only for table engines that support them (MergeTree family, Memory, MaterializedView). Other engines will report a more clear error. Fixes [#21168](https://github.com/ClickHouse/ClickHouse/issues/21168). [#21183](https://github.com/ClickHouse/ClickHouse/pull/21183) ([alesapin](https://github.com/alesapin)). +* Fix crash in `EXPLAIN` for query with `UNION`. Fixes [#20876](https://github.com/ClickHouse/ClickHouse/issues/20876), [#21170](https://github.com/ClickHouse/ClickHouse/issues/21170). [#21246](https://github.com/ClickHouse/ClickHouse/pull/21246) ([flynn](https://github.com/ucasfl)). +* Fix bug with `join_use_nulls` and joining `TOTALS` from subqueries. This closes [#19362](https://github.com/ClickHouse/ClickHouse/issues/19362) and [#21137](https://github.com/ClickHouse/ClickHouse/issues/21137). [#21248](https://github.com/ClickHouse/ClickHouse/pull/21248) ([Vladimir C](https://github.com/vdimir)). +* Fix redundant reconnects to ZooKeeper and the possibility of two active sessions for a single clickhouse server. Both problems introduced in [#14678](https://github.com/ClickHouse/ClickHouse/issues/14678). [#21264](https://github.com/ClickHouse/ClickHouse/pull/21264) ([alesapin](https://github.com/alesapin)). +* Now `ALTER MODIFY COLUMN` queries will correctly affect changes in partition key, skip indices, TTLs, and so on. Fixes [#13675](https://github.com/ClickHouse/ClickHouse/issues/13675). [#21334](https://github.com/ClickHouse/ClickHouse/pull/21334) ([alesapin](https://github.com/alesapin)). +* Fix error `Bad cast from type ... to DB::ColumnLowCardinality` while inserting into table with `LowCardinality` column from `Values` format. Fixes [#21140](https://github.com/ClickHouse/ClickHouse/issues/21140). [#21357](https://github.com/ClickHouse/ClickHouse/pull/21357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV for distributed queries on failures. [#21434](https://github.com/ClickHouse/ClickHouse/pull/21434) ([Azat Khuzhin](https://github.com/azat)). +* Fix a deadlock in `ALTER DELETE` mutations for non replicated MergeTree table engines when the predicate contains the table itself. Fixes [#20558](https://github.com/ClickHouse/ClickHouse/issues/20558). [#21477](https://github.com/ClickHouse/ClickHouse/pull/21477) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement +* Fixed port clash from test_storage_kerberized_hdfs test. [#19974](https://github.com/ClickHouse/ClickHouse/pull/19974) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix some of the issues found by Coverity. See [#19964](https://github.com/ClickHouse/ClickHouse/issues/19964). [#20010](https://github.com/ClickHouse/ClickHouse/pull/20010) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to build ClickHouse with AVX-2 enabled globally. It gives slight performance benefits on modern CPUs. Not recommended for production and will not be supported as official build for now. [#20180](https://github.com/ClickHouse/ClickHouse/pull/20180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add some checks in order by fix the bug https://clickhouse-test-reports.s3.yandex.net/20472/5bdc57004682a5e0236ec630546d20ad752c2fde/stress_test_(thread)/stderr.log. [#20516](https://github.com/ClickHouse/ClickHouse/pull/20516) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Print `stdout` and `stderr` to log when failed to start docker in integration tests. Before this PR there was a very short error message in this case which didn't help to investigate the problems:. [#20631](https://github.com/ClickHouse/ClickHouse/pull/20631) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove most of `sleep` commands from kafka integration tests, waiting for log events instead. Add test for different compression methods. [#21111](https://github.com/ClickHouse/ClickHouse/pull/21111) ([filimonov](https://github.com/filimonov)). +* Allow to start up with modified binary under gdb. In previous version if you set up breakpoint in gdb before start, server will refuse to start up due to failed integrity check. [#21258](https://github.com/ClickHouse/ClickHouse/pull/21258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Experimental feature + +* Introduce experimental support for window functions, enabled with `allow_experimental_functions = 1`. This is a preliminary, alpha-quality implementation that is not suitable for production use and will change in backward-incompatible ways in future releases. Please see [the documentation](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/sql-reference/window-functions/index.md#experimental-window-functions) for the list of supported features. [#20337](https://github.com/ClickHouse/ClickHouse/pull/20337) ([Alexander Kuzmenkov](https://github.com/akuzm)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Fix access control manager destruction order"'. [#20394](https://github.com/ClickHouse/ClickHouse/pull/20394) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Update argmax.md '. [#20625](https://github.com/ClickHouse/ClickHouse/pull/20625) ([Marvin Taschenberger](https://github.com/Taschenbergerm)). + diff --git a/docs/changelogs/v21.3.10.1-lts.md b/docs/changelogs/v21.3.10.1-lts.md new file mode 100644 index 00000000000..49ece009ad1 --- /dev/null +++ b/docs/changelogs/v21.3.10.1-lts.md @@ -0,0 +1,8 @@ +### ClickHouse release v21.3.10.1-lts FIXME as compared to v21.3.9.83-lts + +#### Bug Fix +* Backported in [#23973](https://github.com/ClickHouse/ClickHouse/issues/23973): Fixed a bug in recovery of staled `ReplicatedMergeTree` replica. Some metadata updates could be ignored by staled replica if `ALTER` query was executed during downtime of the replica. [#23742](https://github.com/ClickHouse/ClickHouse/pull/23742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23773](https://github.com/ClickHouse/ClickHouse/issues/23773): Avoid possible "Cannot schedule a task" error (in case some exception had been occurred) on INSERT into Distributed. [#23744](https://github.com/ClickHouse/ClickHouse/pull/23744) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#23817](https://github.com/ClickHouse/ClickHouse/issues/23817): Fix crash when `PREWHERE` and row policy filter are both in effect with empty result. [#23763](https://github.com/ClickHouse/ClickHouse/pull/23763) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#23814](https://github.com/ClickHouse/ClickHouse/issues/23814): Fix `CLEAR COLUMN` does not work when it is referenced by materialized view. Close [#23764](https://github.com/ClickHouse/ClickHouse/issues/23764). [#23781](https://github.com/ClickHouse/ClickHouse/pull/23781) ([flynn](https://github.com/ucasfl)). + diff --git a/docs/changelogs/v21.3.11.5-lts.md b/docs/changelogs/v21.3.11.5-lts.md new file mode 100644 index 00000000000..61aa8a54688 --- /dev/null +++ b/docs/changelogs/v21.3.11.5-lts.md @@ -0,0 +1,9 @@ +### ClickHouse release v21.3.11.5-lts FIXME as compared to v21.3.10.1-lts + +#### Improvement +* Backported in [#24085](https://github.com/ClickHouse/ClickHouse/issues/24085): Support specifying table schema for postgresql dictionary source. Closes [#23958](https://github.com/ClickHouse/ClickHouse/issues/23958). [#23980](https://github.com/ClickHouse/ClickHouse/pull/23980) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Backported in [#24002](https://github.com/ClickHouse/ClickHouse/issues/24002): Fix SIGSEGV for external GROUP BY and overflow row (i.e. queries like `SELECT FROM GROUP BY WITH TOTALS SETTINGS max_bytes_before_external_group_by>0, max_rows_to_group_by>0, group_by_overflow_mode='any', totals_mode='before_having'`). [#23962](https://github.com/ClickHouse/ClickHouse/pull/23962) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#24032](https://github.com/ClickHouse/ClickHouse/issues/24032): Fix crash in MergeJoin, close [#24010](https://github.com/ClickHouse/ClickHouse/issues/24010). [#24013](https://github.com/ClickHouse/ClickHouse/pull/24013) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v21.3.12.2-lts.md b/docs/changelogs/v21.3.12.2-lts.md new file mode 100644 index 00000000000..cfddf8c9cdd --- /dev/null +++ b/docs/changelogs/v21.3.12.2-lts.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.3.12.2-lts FIXME as compared to v21.3.11.5-lts + +#### Bug Fix +* Backported in [#24189](https://github.com/ClickHouse/ClickHouse/issues/24189): Some `ALTER PARTITION` queries might cause `Part A intersects previous part B` and `Unexpected merged part C intersecting drop range D` errors in replication queue. It's fixed. Fixes [#23296](https://github.com/ClickHouse/ClickHouse/issues/23296). [#23997](https://github.com/ClickHouse/ClickHouse/pull/23997) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24306](https://github.com/ClickHouse/ClickHouse/issues/24306): Fixed using const `DateTime` value vs `DateTime64` column in WHERE. ... [#24100](https://github.com/ClickHouse/ClickHouse/pull/24100) ([Vasily Nemkov](https://github.com/Enmk)). +* Backported in [#24140](https://github.com/ClickHouse/ClickHouse/issues/24140): Bug: explain pipeline with` select xxx final `shows wrong pipeline: ``` dell123 :) explain pipeline select z from prewhere_move_select_final final;. [#24116](https://github.com/ClickHouse/ClickHouse/pull/24116) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#24190](https://github.com/ClickHouse/ClickHouse/issues/24190): Fix a rare bug that could lead to a partially initialized table that can serve write requests (insert/alter/so on). Now such tables will be in readonly mode. [#24122](https://github.com/ClickHouse/ClickHouse/pull/24122) ([alesapin](https://github.com/alesapin)). +* Backported in [#24235](https://github.com/ClickHouse/ClickHouse/issues/24235): Fix race condition which could happen in RBAC under a heavy load. This PR fixes [#24090](https://github.com/ClickHouse/ClickHouse/issues/24090), [#24134](https://github.com/ClickHouse/ClickHouse/issues/24134),. [#24176](https://github.com/ClickHouse/ClickHouse/pull/24176) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#24244](https://github.com/ClickHouse/ClickHouse/issues/24244): Fix abnormal server termination due to hdfs becoming not accessible during query execution. Closes [#24117](https://github.com/ClickHouse/ClickHouse/issues/24117). [#24191](https://github.com/ClickHouse/ClickHouse/pull/24191) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#24241](https://github.com/ClickHouse/ClickHouse/issues/24241): Fix wrong typo at StorageMemory, this bug was introduced at [#15127](https://github.com/ClickHouse/ClickHouse/issues/15127), now fixed, Closes [#24192](https://github.com/ClickHouse/ClickHouse/issues/24192). [#24193](https://github.com/ClickHouse/ClickHouse/pull/24193) ([张中南](https://github.com/plugine)). +* Backported in [#24353](https://github.com/ClickHouse/ClickHouse/issues/24353): Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v21.3.13.9-lts.md b/docs/changelogs/v21.3.13.9-lts.md new file mode 100644 index 00000000000..dccb9c53162 --- /dev/null +++ b/docs/changelogs/v21.3.13.9-lts.md @@ -0,0 +1,43 @@ +### ClickHouse release v21.3.13.9-lts FIXME as compared to v21.3.12.2-lts + +#### Improvement +* Backported in [#24794](https://github.com/ClickHouse/ClickHouse/issues/24794): Avoid hiding errors like `Limit for rows or bytes to read exceeded` for scalar subqueries. [#24545](https://github.com/ClickHouse/ClickHouse/pull/24545) ([nvartolomei](https://github.com/nvartolomei)). + +#### Bug Fix +* Backported in [#25567](https://github.com/ClickHouse/ClickHouse/issues/25567): Kafka storage may support parquet format messages. [#23412](https://github.com/ClickHouse/ClickHouse/pull/23412) ([Chao Ma](https://github.com/godliness)). +* Backported in [#24554](https://github.com/ClickHouse/ClickHouse/issues/24554): Fixed server fault when inserting data through HTTP caused an exception. This fixes [#23512](https://github.com/ClickHouse/ClickHouse/issues/23512). [#23643](https://github.com/ClickHouse/ClickHouse/pull/23643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#23833](https://github.com/ClickHouse/ClickHouse/issues/23833): Fix error `Can't initialize pipeline with empty pipe` for queries with `GLOBAL IN/JOIN` and `use_hedged_requests`. Fixes [#23431](https://github.com/ClickHouse/ClickHouse/issues/23431). [#23805](https://github.com/ClickHouse/ClickHouse/pull/23805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#24703](https://github.com/ClickHouse/ClickHouse/issues/24703): Allow empty HTTP headers. Fixes [#23901](https://github.com/ClickHouse/ClickHouse/issues/23901). [#24285](https://github.com/ClickHouse/ClickHouse/pull/24285) ([Ivan](https://github.com/abyss7)). +* Backported in [#24702](https://github.com/ClickHouse/ClickHouse/issues/24702): Fix drop partition with intersect fake parts. In rare cases there might be parts with mutation version greater than current block number. [#24321](https://github.com/ClickHouse/ClickHouse/pull/24321) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#24725](https://github.com/ClickHouse/ClickHouse/issues/24725): In "multipart/form-data" message consider the CRLF preceding a boundary as part of it. Fixes [#23905](https://github.com/ClickHouse/ClickHouse/issues/23905). [#24399](https://github.com/ClickHouse/ClickHouse/pull/24399) ([Ivan](https://github.com/abyss7)). +* Backported in [#24826](https://github.com/ClickHouse/ClickHouse/issues/24826): - Fixed the deadlock that can happen during LDAP role (re)mapping, when LDAP group is mapped to a nonexistent local role. [#24431](https://github.com/ClickHouse/ClickHouse/pull/24431) ([Denis Glazachev](https://github.com/traceon)). +* Backported in [#25566](https://github.com/ClickHouse/ClickHouse/issues/25566): Fix incorrect monotonicity of toWeek function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/5212 , and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#24700](https://github.com/ClickHouse/ClickHouse/issues/24700): Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` is being processed infinitely. This was detected on server with extremely little amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#24698](https://github.com/ClickHouse/ClickHouse/issues/24698): Enable reading of subcolumns for distributed tables. [#24472](https://github.com/ClickHouse/ClickHouse/pull/24472) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed remote JDBC bridge timeout connection issue. Closes [#9609](https://github.com/ClickHouse/ClickHouse/issues/9609). [#24588](https://github.com/ClickHouse/ClickHouse/pull/24588) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#24771](https://github.com/ClickHouse/ClickHouse/issues/24771): Fix bug which can lead to ZooKeeper client hung inside clickhouse-server. [#24721](https://github.com/ClickHouse/ClickHouse/pull/24721) ([alesapin](https://github.com/alesapin)). +* Backported in [#24934](https://github.com/ClickHouse/ClickHouse/issues/24934): - If ZooKeeper connection was lost and replica was cloned after restoring the connection, its replication queue might contain outdated entries. It's fixed. - Fixed crash when replication queue contains intersecting virtual parts. It may rarely happen if some data part was lost. Print error in log instead of terminating. [#24777](https://github.com/ClickHouse/ClickHouse/pull/24777) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24850](https://github.com/ClickHouse/ClickHouse/issues/24850): Fix bug when exception `Mutation was killed` can be thrown to the client on mutation wait when mutation not loaded into memory yet. [#24809](https://github.com/ClickHouse/ClickHouse/pull/24809) ([alesapin](https://github.com/alesapin)). +* Backported in [#24917](https://github.com/ClickHouse/ClickHouse/issues/24917): Fix extremely rare bug on low-memory servers which can lead to the inability to perform merges without restart. Possibly fixes [#24603](https://github.com/ClickHouse/ClickHouse/issues/24603). [#24872](https://github.com/ClickHouse/ClickHouse/pull/24872) ([alesapin](https://github.com/alesapin)). +* Backported in [#25185](https://github.com/ClickHouse/ClickHouse/issues/25185): Fixed bug with declaring S3 disk at root of bucket. Earlier, it reported an error: ``` [heather] 2021.05.10 02:11:11.932234 [ 72790 ] {2ff80b7b-ec53-41cb-ac35-19bb390e1759} executeQuery: Code: 36, e.displayText() = DB::Exception: Key name is empty in path style S3 URI: (http://172.17.0.2/bucket/) (version 21.6.1.1) (from 127.0.0.1:47994) (in query: SELECT policy_name FROM system.storage_policies), Stack trace (when copying this message, always include the lines below):. [#24898](https://github.com/ClickHouse/ClickHouse/pull/24898) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#24949](https://github.com/ClickHouse/ClickHouse/issues/24949): Fix possible heap-buffer-overflow in Arrow. [#24922](https://github.com/ClickHouse/ClickHouse/pull/24922) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25562](https://github.com/ClickHouse/ClickHouse/issues/25562): Fix extremely rare error `Tagging already tagged part` in replication queue during concurrent `alter move/replace partition`. Possibly fixes [#22142](https://github.com/ClickHouse/ClickHouse/issues/22142). [#24961](https://github.com/ClickHouse/ClickHouse/pull/24961) ([alesapin](https://github.com/alesapin)). +* Backported in [#25367](https://github.com/ClickHouse/ClickHouse/issues/25367): Fix serialization of splitted nested messages in Protobuf format. This PR fixes [#24647](https://github.com/ClickHouse/ClickHouse/issues/24647). [#25000](https://github.com/ClickHouse/ClickHouse/pull/25000) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#25561](https://github.com/ClickHouse/ClickHouse/issues/25561): Fix bug which allows creating tables with columns referencing themselves like `a UInt32 ALIAS a + 1` or `b UInt32 MATERIALIZED b`. Fixes [#24910](https://github.com/ClickHouse/ClickHouse/issues/24910), [#24292](https://github.com/ClickHouse/ClickHouse/issues/24292). [#25059](https://github.com/ClickHouse/ClickHouse/pull/25059) ([alesapin](https://github.com/alesapin)). +* Backported in [#25105](https://github.com/ClickHouse/ClickHouse/issues/25105): Fix bug with constant maps in mapContains that lead to error `empty column was returned by function mapContains`. Closes [#25077](https://github.com/ClickHouse/ClickHouse/issues/25077). [#25080](https://github.com/ClickHouse/ClickHouse/pull/25080) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25142](https://github.com/ClickHouse/ClickHouse/issues/25142): Fix crash in query with cross join and `joined_subquery_requires_alias = 0`. Fixes [#24011](https://github.com/ClickHouse/ClickHouse/issues/24011). [#25082](https://github.com/ClickHouse/ClickHouse/pull/25082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25351](https://github.com/ClickHouse/ClickHouse/issues/25351): Fix TOCTOU error in installation script. [#25277](https://github.com/ClickHouse/ClickHouse/pull/25277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25473](https://github.com/ClickHouse/ClickHouse/issues/25473): Fix joinGetOrNull with not-nullable columns. This fixes [#24261](https://github.com/ClickHouse/ClickHouse/issues/24261). [#25288](https://github.com/ClickHouse/ClickHouse/pull/25288) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#25361](https://github.com/ClickHouse/ClickHouse/issues/25361): Fix error `Bad cast from type DB::ColumnLowCardinality to DB::ColumnVector` for queries where `LowCardinality` argument was used for IN (this bug appeared in 21.6). Fixes [#25187](https://github.com/ClickHouse/ClickHouse/issues/25187). [#25290](https://github.com/ClickHouse/ClickHouse/pull/25290) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25355](https://github.com/ClickHouse/ClickHouse/issues/25355): Fix Logical Error Cannot sum Array/Tuple in min/maxMap. [#25298](https://github.com/ClickHouse/ClickHouse/pull/25298) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25437](https://github.com/ClickHouse/ClickHouse/issues/25437): Support `SimpleAggregateFunction(LowCardinality)` for `SummingMergeTree`. Fixes [#25134](https://github.com/ClickHouse/ClickHouse/issues/25134). [#25300](https://github.com/ClickHouse/ClickHouse/pull/25300) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25364](https://github.com/ClickHouse/ClickHouse/issues/25364): On ZooKeeper connection loss `ReplicatedMergeTree` table might wait for background operations to complete before trying to reconnect. It's fixed, now background operations are stopped forcefully. [#25306](https://github.com/ClickHouse/ClickHouse/pull/25306) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25387](https://github.com/ClickHouse/ClickHouse/issues/25387): Fix the possibility of non-deterministic behaviour of the `quantileDeterministic` function and similar. This closes [#20480](https://github.com/ClickHouse/ClickHouse/issues/20480). [#25313](https://github.com/ClickHouse/ClickHouse/pull/25313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25455](https://github.com/ClickHouse/ClickHouse/issues/25455): Fix lost `WHERE` condition in expression-push-down optimization of query plan (setting `query_plan_filter_push_down = 1` by default). Fixes [#25368](https://github.com/ClickHouse/ClickHouse/issues/25368). [#25370](https://github.com/ClickHouse/ClickHouse/pull/25370) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25406](https://github.com/ClickHouse/ClickHouse/issues/25406): Fix `REPLACE` column transformer when used in DDL by correctly quoting the formated query. This fixes [#23925](https://github.com/ClickHouse/ClickHouse/issues/23925). [#25391](https://github.com/ClickHouse/ClickHouse/pull/25391) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#25505](https://github.com/ClickHouse/ClickHouse/issues/25505): Fix segfault when sharding_key is absent in task config for copier. [#25419](https://github.com/ClickHouse/ClickHouse/pull/25419) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#24721](https://github.com/ClickHouse/ClickHouse/issues/24721) to 21.3: Remove endless `wait` from ZooKeeper client"'. [#24799](https://github.com/ClickHouse/ClickHouse/pull/24799) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v21.3.14.1-lts.md b/docs/changelogs/v21.3.14.1-lts.md new file mode 100644 index 00000000000..b2408471ccd --- /dev/null +++ b/docs/changelogs/v21.3.14.1-lts.md @@ -0,0 +1,10 @@ +### ClickHouse release v21.3.14.1-lts FIXME as compared to v21.3.13.9-lts + +#### Bug Fix +* Backported in [#25851](https://github.com/ClickHouse/ClickHouse/issues/25851): `CAST` from `Date` to `DateTime` (or `DateTime64`) was not using the timezone of the `DateTime` type. It can also affect the comparison between `Date` and `DateTime`. Inference of the common type for `Date` and `DateTime` also was not using the corresponding timezone. It affected the results of function `if` and array construction. Closes [#24128](https://github.com/ClickHouse/ClickHouse/issues/24128). [#24129](https://github.com/ClickHouse/ClickHouse/pull/24129) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#25678](https://github.com/ClickHouse/ClickHouse/issues/25678): Fixed bug in deserialization of random generator state with might cause some data types such as `AggregateFunction(groupArraySample(N), T))` to behave in a non-deterministic way. [#24538](https://github.com/ClickHouse/ClickHouse/pull/24538) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25637](https://github.com/ClickHouse/ClickHouse/issues/25637): Fix wrong totals for query `WITH TOTALS` and `WITH FILL`. Fixes [#20872](https://github.com/ClickHouse/ClickHouse/issues/20872). [#25539](https://github.com/ClickHouse/ClickHouse/pull/25539) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#25651](https://github.com/ClickHouse/ClickHouse/issues/25651): Fix null pointer dereference in `EXPLAIN AST` without query. [#25631](https://github.com/ClickHouse/ClickHouse/pull/25631) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25716](https://github.com/ClickHouse/ClickHouse/issues/25716): `REPLACE PARTITION` might be ignored in rare cases if the source partition was empty. It's fixed. Fixes [#24869](https://github.com/ClickHouse/ClickHouse/issues/24869). [#25665](https://github.com/ClickHouse/ClickHouse/pull/25665) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25712](https://github.com/ClickHouse/ClickHouse/issues/25712): Fixed `No such file or directory` error on moving `Distributed` table between databases. Fixes [#24971](https://github.com/ClickHouse/ClickHouse/issues/24971). [#25667](https://github.com/ClickHouse/ClickHouse/pull/25667) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v21.3.15.4-stable.md b/docs/changelogs/v21.3.15.4-stable.md new file mode 100644 index 00000000000..c6b7a15aa4d --- /dev/null +++ b/docs/changelogs/v21.3.15.4-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v21.3.15.4-stable FIXME as compared to v21.3.14.1-lts + +#### Bug Fix +* Backported in [#25956](https://github.com/ClickHouse/ClickHouse/issues/25956): Fix extremely long backoff for background tasks when the background pool is full. Fixes [#25836](https://github.com/ClickHouse/ClickHouse/issues/25836). [#25893](https://github.com/ClickHouse/ClickHouse/pull/25893) ([alesapin](https://github.com/alesapin)). +* Backported in [#26141](https://github.com/ClickHouse/ClickHouse/issues/26141): Fix possible crash in `pointInPolygon` if the setting `validate_polygons` is turned off. [#26113](https://github.com/ClickHouse/ClickHouse/pull/26113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.3.16.5-lts.md b/docs/changelogs/v21.3.16.5-lts.md new file mode 100644 index 00000000000..b5b31b64488 --- /dev/null +++ b/docs/changelogs/v21.3.16.5-lts.md @@ -0,0 +1,25 @@ +### ClickHouse release v21.3.16.5-lts FIXME as compared to v21.3.15.4-stable + +#### Bug Fix +* Backported in [#26940](https://github.com/ClickHouse/ClickHouse/issues/26940): Do not remove data on ReplicatedMergeTree table shutdown to avoid creating data to metadata inconsistency. [#26716](https://github.com/ClickHouse/ClickHouse/pull/26716) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#26983](https://github.com/ClickHouse/ClickHouse/issues/26983): Aggregate function parameters might be lost when applying some combinators causing exceptions like `Conversion from AggregateFunction(topKArray, Array(String)) to AggregateFunction(topKArray(10), Array(String)) is not supported`. It's fixed. Fixes [#26196](https://github.com/ClickHouse/ClickHouse/issues/26196) and [#26433](https://github.com/ClickHouse/ClickHouse/issues/26433). [#26814](https://github.com/ClickHouse/ClickHouse/pull/26814) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26998](https://github.com/ClickHouse/ClickHouse/issues/26998): Fix reading of custom TLDs (stops processing with lower buffer or bigger file). [#26948](https://github.com/ClickHouse/ClickHouse/pull/26948) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27088](https://github.com/ClickHouse/ClickHouse/issues/27088): Now partition ID in queries like `ALTER TABLE ... PARTITION ID xxx` validates for correctness. Fixes [#25718](https://github.com/ClickHouse/ClickHouse/issues/25718). [#26963](https://github.com/ClickHouse/ClickHouse/pull/26963) ([alesapin](https://github.com/alesapin)). +* Backported in [#27049](https://github.com/ClickHouse/ClickHouse/issues/27049): [RFC] Fix possible mutation stack due to race with DROP_RANGE. [#27002](https://github.com/ClickHouse/ClickHouse/pull/27002) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27158](https://github.com/ClickHouse/ClickHouse/issues/27158): Fix synchronization in GRPCServer This PR fixes [#27024](https://github.com/ClickHouse/ClickHouse/issues/27024). [#27064](https://github.com/ClickHouse/ClickHouse/pull/27064) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27368](https://github.com/ClickHouse/ClickHouse/issues/27368): - Fix uninitialized memory in functions `multiSearch*` with empty array, close [#27169](https://github.com/ClickHouse/ClickHouse/issues/27169). [#27181](https://github.com/ClickHouse/ClickHouse/pull/27181) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#27264](https://github.com/ClickHouse/ClickHouse/issues/27264): In rare cases `system.detached_parts` table might contain incorrect information for some parts, it's fixed. Fixes [#27114](https://github.com/ClickHouse/ClickHouse/issues/27114). [#27183](https://github.com/ClickHouse/ClickHouse/pull/27183) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#27413](https://github.com/ClickHouse/ClickHouse/issues/27413): Fixed incorrect validation of partition id for MergeTree tables that created with old syntax. [#27328](https://github.com/ClickHouse/ClickHouse/pull/27328) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#27645](https://github.com/ClickHouse/ClickHouse/issues/27645): Fix incorrect result for query with row-level security, prewhere and LowCardinality filter. Fixes [#27179](https://github.com/ClickHouse/ClickHouse/issues/27179). [#27329](https://github.com/ClickHouse/ClickHouse/pull/27329) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#27473](https://github.com/ClickHouse/ClickHouse/issues/27473): fix metric BackgroundMessageBrokerSchedulePoolTask, maybe mistyped。. [#27452](https://github.com/ClickHouse/ClickHouse/pull/27452) ([Ben](https://github.com/benbiti)). +* Backported in [#27864](https://github.com/ClickHouse/ClickHouse/issues/27864): Prevent crashes for some formats when NULL (tombstone) message was coming from Kafka. Closes [#19255](https://github.com/ClickHouse/ClickHouse/issues/19255). [#27794](https://github.com/ClickHouse/ClickHouse/pull/27794) ([filimonov](https://github.com/filimonov)). +* Backported in [#28207](https://github.com/ClickHouse/ClickHouse/issues/28207): Fix cases, when read buffer fails with 'attempt to read after end of file'. Closes [#26149](https://github.com/ClickHouse/ClickHouse/issues/26149). [#28150](https://github.com/ClickHouse/ClickHouse/pull/28150) ([Filatenkov Artur](https://github.com/FArthur-cmd)). + +#### Build/Testing/Packaging Improvement +* Backported in [#28075](https://github.com/ClickHouse/ClickHouse/issues/28075): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#28181](https://github.com/ClickHouse/ClickHouse/issues/28181): Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). +* Backported in [#28293](https://github.com/ClickHouse/ClickHouse/issues/28293): Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v21.3.17.2-lts.md b/docs/changelogs/v21.3.17.2-lts.md new file mode 100644 index 00000000000..b6b48ddab61 --- /dev/null +++ b/docs/changelogs/v21.3.17.2-lts.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.3.17.2-lts FIXME as compared to v21.3.16.5-lts + +#### Bug Fix +* Backported in [#28647](https://github.com/ClickHouse/ClickHouse/issues/28647): Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#28569](https://github.com/ClickHouse/ClickHouse/issues/28569): Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after alter of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). +* Backported in [#28857](https://github.com/ClickHouse/ClickHouse/issues/28857): Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). +* Backported in [#28995](https://github.com/ClickHouse/ClickHouse/issues/28995): Fix reading of subcolumns from compact parts. [#28873](https://github.com/ClickHouse/ClickHouse/pull/28873) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28929](https://github.com/ClickHouse/ClickHouse/issues/28929): Fix higher-order array functions (`SIGSEGV` for `arrayCompact`/`ILLEGAL_COLUMN` for `arrayDifference`/`arrayCumSumNonNegative`) with consts. [#28904](https://github.com/ClickHouse/ClickHouse/pull/28904) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.3.18.4-lts.md b/docs/changelogs/v21.3.18.4-lts.md new file mode 100644 index 00000000000..612b3660e3b --- /dev/null +++ b/docs/changelogs/v21.3.18.4-lts.md @@ -0,0 +1,22 @@ +### ClickHouse release v21.3.18.4-lts FIXME as compared to v21.3.17.2-lts + +#### Improvement +* Backported in [#30355](https://github.com/ClickHouse/ClickHouse/issues/30355): Use separate `clickhouse-bridge` group and user for bridge processes. Set oom_score_adj so the bridges will be first subjects for OOM killer. Set set maximum RSS to 1 GiB. Closes [#23861](https://github.com/ClickHouse/ClickHouse/issues/23861). [#25280](https://github.com/ClickHouse/ClickHouse/pull/25280) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29945](https://github.com/ClickHouse/ClickHouse/issues/29945): Update zoneinfo files to 2021c. [#29925](https://github.com/ClickHouse/ClickHouse/pull/29925) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#30041](https://github.com/ClickHouse/ClickHouse/issues/30041): Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29260](https://github.com/ClickHouse/ClickHouse/issues/29260): Fix invalid constant type conversion when nullable or lowcardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#29026](https://github.com/ClickHouse/ClickHouse/issues/29026): Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29194](https://github.com/ClickHouse/ClickHouse/issues/29194): Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#29360](https://github.com/ClickHouse/ClickHouse/issues/29360): Fix possible `Table columns structure in ZooKeeper is different from local table structure` exception while recreating or creating new replicas of `ReplicatedMergeTree`, when one of table columns have default expressions with case-insensitive functions. [#29266](https://github.com/ClickHouse/ClickHouse/pull/29266) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#29300](https://github.com/ClickHouse/ClickHouse/issues/29300): Fix connection timeouts (`send_timeout`/`receive_timeout`). [#29282](https://github.com/ClickHouse/ClickHouse/pull/29282) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30042](https://github.com/ClickHouse/ClickHouse/issues/30042): Condition in filter predicate could be lost after push-down optimisation. [#29625](https://github.com/ClickHouse/ClickHouse/pull/29625) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29848](https://github.com/ClickHouse/ClickHouse/issues/29848): Fix concurrent access to `LowCardinality` during `GROUP BY` (leads to SIGSEGV). [#29782](https://github.com/ClickHouse/ClickHouse/pull/29782) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30066](https://github.com/ClickHouse/ClickHouse/issues/30066): Fix crash of sample by `tuple()`, closes [#30004](https://github.com/ClickHouse/ClickHouse/issues/30004). [#30016](https://github.com/ClickHouse/ClickHouse/pull/30016) ([flynn](https://github.com/ucasfl)). +* Backported in [#30332](https://github.com/ClickHouse/ClickHouse/issues/30332): * Allow identifiers staring with numbers in multiple joins. [#30230](https://github.com/ClickHouse/ClickHouse/pull/30230) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#30379](https://github.com/ClickHouse/ClickHouse/issues/30379): fix replaceRegexpAll bug. [#30292](https://github.com/ClickHouse/ClickHouse/pull/30292) ([Memo](https://github.com/Joeywzr)). + diff --git a/docs/changelogs/v21.3.19.1-lts.md b/docs/changelogs/v21.3.19.1-lts.md new file mode 100644 index 00000000000..6a2be8d6dcb --- /dev/null +++ b/docs/changelogs/v21.3.19.1-lts.md @@ -0,0 +1,26 @@ +### ClickHouse release v21.3.19.1-lts FIXME as compared to v21.3.18.4-lts + +#### Performance Improvement +* Backported in [#31733](https://github.com/ClickHouse/ClickHouse/issues/31733): Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#31577](https://github.com/ClickHouse/ClickHouse/issues/31577): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). +* Backported in [#32347](https://github.com/ClickHouse/ClickHouse/issues/32347): Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release + +* Backported in [#30913](https://github.com/ClickHouse/ClickHouse/issues/30913): Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#30750](https://github.com/ClickHouse/ClickHouse/issues/30750): Functions for case-insensitive search in UTF8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31038](https://github.com/ClickHouse/ClickHouse/issues/31038): Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31519](https://github.com/ClickHouse/ClickHouse/issues/31519): Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Backported in [#31581](https://github.com/ClickHouse/ClickHouse/issues/31581): * Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). * Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#31792](https://github.com/ClickHouse/ClickHouse/issues/31792): Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31816](https://github.com/ClickHouse/ClickHouse/issues/31816): Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31758](https://github.com/ClickHouse/ClickHouse/issues/31758): Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#32153](https://github.com/ClickHouse/ClickHouse/issues/32153): Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#31894](https://github.com/ClickHouse/ClickHouse/issues/31894): Fix possible assertion `../src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion '!hasPendingData()' failed.` in TSKV format. [#31804](https://github.com/ClickHouse/ClickHouse/pull/31804) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32214](https://github.com/ClickHouse/ClickHouse/issues/32214): Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v21.3.2.5-lts.md b/docs/changelogs/v21.3.2.5-lts.md new file mode 100644 index 00000000000..5135f909b7f --- /dev/null +++ b/docs/changelogs/v21.3.2.5-lts.md @@ -0,0 +1,161 @@ +### ClickHouse release v21.3.2.5-lts FIXME as compared to v21.2.1.5869-prestable + +#### Backward Incompatible Change +* Now all case-insensitive function names will be lower-cased during query analysis. This is needed for projection query routing. [#20174](https://github.com/ClickHouse/ClickHouse/pull/20174) ([Amos Bird](https://github.com/amosbird)). +* Now it's not allowed to create MergeTree tables in old syntax with table TTL because it's just ignored. Attach of old tables is still possible. [#20282](https://github.com/ClickHouse/ClickHouse/pull/20282) ([alesapin](https://github.com/alesapin)). + +#### New Feature +* Add experimental `Replicated` database engine. It replicates DDL queries across multiple hosts. [#16193](https://github.com/ClickHouse/ClickHouse/pull/16193) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Distributed query deduplication is a followup to [#16033](https://github.com/ClickHouse/ClickHouse/issues/16033) and partially resolves the proposal [#13574](https://github.com/ClickHouse/ClickHouse/issues/13574). [#17348](https://github.com/ClickHouse/ClickHouse/pull/17348) ([xjewer](https://github.com/xjewer)). +* Add the ability to backup/restore metadata files for DiskS3. [#18377](https://github.com/ClickHouse/ClickHouse/pull/18377) ([Pavel Kovalenko](https://github.com/Jokser)). +* - Included in the pull request. [#18508](https://github.com/ClickHouse/ClickHouse/pull/18508) ([PHO](https://github.com/depressed-pho)). +* Added file() function to read file as a String. This close [#issue:18851](https://github.com/ClickHouse/ClickHouse/issues/18851). [#19204](https://github.com/ClickHouse/ClickHouse/pull/19204) ([keenwolf](https://github.com/keen-wolf)). +* Tables with `MergeTree*` engine now have two new table-level settings for query concurrency control. Setting `max_concurrent_queries` limits the number of concurrently executed queries which are related to this table. Setting `min_marks_to_honor_max_concurrent_queries` tells to apply previous setting only if query reads at least this number of marks. [#19544](https://github.com/ClickHouse/ClickHouse/pull/19544) ([Amos Bird](https://github.com/amosbird)). +* - Mentioned in [#18454](https://github.com/ClickHouse/ClickHouse/issues/18454) - add function `htmlOrxmlCoarseParse`; - support `` parse; - support `` parse; - support `` parse; - support white space collapse; - support any `` format parse; - HyperScan to support SIMD; - Everything is done in a single pass. [#19600](https://github.com/ClickHouse/ClickHouse/pull/19600) ([zlx19950903](https://github.com/zlx19950903)). +* Add quota type QUERY_SELECTS and QUERY_INSERTS. [#19603](https://github.com/ClickHouse/ClickHouse/pull/19603) ([JackyWoo](https://github.com/JackyWoo)). +* ExecutableDictionarySource added implicit_key option. Fixes [#14527](https://github.com/ClickHouse/ClickHouse/issues/14527). [#19677](https://github.com/ClickHouse/ClickHouse/pull/19677) ([Maksim Kita](https://github.com/kitaisreal)). +* Added Server Side Encryption Customer Keys (the `x-amz-server-side-encryption-customer-(key/md5)` header) support in S3 client. See [the link](https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html). Closes [#19428](https://github.com/ClickHouse/ClickHouse/issues/19428). [#19748](https://github.com/ClickHouse/ClickHouse/pull/19748) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Function `reinterpretAs` updated to support big integers. Fixes [#19691](https://github.com/ClickHouse/ClickHouse/issues/19691). [#19858](https://github.com/ClickHouse/ClickHouse/pull/19858) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting `insert_shard_id` to support insert data into specific shard from distributed table. [#19961](https://github.com/ClickHouse/ClickHouse/pull/19961) ([flynn](https://github.com/ucasfl)). +* Added timezoneOffset(datetime) function which will give the offset from UTC in seconds. This close [#issue:19850](https://github.com/ClickHouse/ClickHouse/issues/19850). [#19962](https://github.com/ClickHouse/ClickHouse/pull/19962) ([keenwolf](https://github.com/keen-wolf)). +* New `event_time_microseconds column` in `system.part_log` table. [#20027](https://github.com/ClickHouse/ClickHouse/pull/20027) ([Bharat Nallan](https://github.com/bharatnc)). +* ... Add aggregate function `deltaSum` for summing the differences between consecutive rows. [#20057](https://github.com/ClickHouse/ClickHouse/pull/20057) ([Russ Frank](https://github.com/rf)). +* Add two settings to delay or throw error during insertion when there are too many inactive parts. This is useful when server fails to clean up parts quickly enough. [#20178](https://github.com/ClickHouse/ClickHouse/pull/20178) ([Amos Bird](https://github.com/amosbird)). +* Add file engine settings: `engine_file_empty_if_not_exists` and `engine_file_truncate_on_insert`. [#20620](https://github.com/ClickHouse/ClickHouse/pull/20620) ([M0r64n](https://github.com/M0r64n)). + +#### Performance Improvement +* Add parallel select final for one part with level>0 when `do_not_merge_across_partitions_select_final` setting is 1. [#19375](https://github.com/ClickHouse/ClickHouse/pull/19375) ([Kruglov Pavel](https://github.com/Avogar)). +* Improved performance of bitmap columns during joins. [#19407](https://github.com/ClickHouse/ClickHouse/pull/19407) ([templarzq](https://github.com/templarzq)). +* Partially reimplement HTTP server to make it making less copies of incoming and outgoing data. It gives up to 1.5 performance improvement on inserting long records over HTTP. [#19516](https://github.com/ClickHouse/ClickHouse/pull/19516) ([Ivan](https://github.com/abyss7)). +* Improve performance of aggregate functions by more strict aliasing. [#19946](https://github.com/ClickHouse/ClickHouse/pull/19946) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the case when DataType parser may have exponential complexity (found by fuzzer). This closes [#20096](https://github.com/ClickHouse/ClickHouse/issues/20096). [#20132](https://github.com/ClickHouse/ClickHouse/pull/20132) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `compress` setting for `Memory` tables. If it's enabled the table will use less RAM. On some machines and datasets it can also work faster on SELECT, but it is not always the case. This closes [#20093](https://github.com/ClickHouse/ClickHouse/issues/20093). Note: there are reasons why Memory tables can work slower than MergeTree: (1) lack of compression (2) static size of blocks (3) lack of indices and prewhere... [#20168](https://github.com/ClickHouse/ClickHouse/pull/20168) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not squash blocks too much on INSERT SELECT if inserting into Memory table. In previous versions inefficient data representation was created in Memory table after INSERT SELECT. This closes [#13052](https://github.com/ClickHouse/ClickHouse/issues/13052). [#20169](https://github.com/ClickHouse/ClickHouse/pull/20169) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improved performance of aggregation by several fixed size fields (unconfirmed). [#20454](https://github.com/ClickHouse/ClickHouse/pull/20454) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up reading from `Memory` tables in extreme cases (when reading speed is in order of 50 GB/sec) by simplification of pipeline and (consequently) less lock contention in pipeline scheduling. [#20468](https://github.com/ClickHouse/ClickHouse/pull/20468) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of GROUP BY multiple fixed size keys. [#20472](https://github.com/ClickHouse/ClickHouse/pull/20472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The setting `distributed_aggregation_memory_efficient` is enabled by default. It will lower memory usage and improve performance of distributed queries. [#20599](https://github.com/ClickHouse/ClickHouse/pull/20599) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better code in aggregation. [#20978](https://github.com/ClickHouse/ClickHouse/pull/20978) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add back intDiv/module vectorConstant specializations for better performance. This fixes [#21293](https://github.com/ClickHouse/ClickHouse/issues/21293) . The regression was introduced in https://github.com/ClickHouse/ClickHouse/pull/18145 . [#21307](https://github.com/ClickHouse/ClickHouse/pull/21307) ([Amos Bird](https://github.com/amosbird)). + +#### Improvement +* Fix creation of `TTL` in cases, when its expression is a function and it is the same as `ORDER BY` key. Now it's allowed to set custom aggregation to primary key columns in `TTL` with `GROUP BY`. Backward incompatible: For primary key columns, which are not in `GROUP BY` and aren't set explicitly now is applied function `any` instead of `max`, when TTL is expired. Also if you use TTL with `WHERE` or `GROUP BY` you can see exceptions at merges, while making rolling update. [#15450](https://github.com/ClickHouse/ClickHouse/pull/15450) ([Anton Popov](https://github.com/CurtizJ)). +* Hedged Requests for remote queries. When setting `use_hedged_requests` enabled (by default), allow to establish many connections with different replicas for query. New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout` or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`); other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported. [#19291](https://github.com/ClickHouse/ClickHouse/pull/19291) ([Kruglov Pavel](https://github.com/Avogar)). +* Print inline frames for fatal stacktraces. [#19317](https://github.com/ClickHouse/ClickHouse/pull/19317) ([Ivan](https://github.com/abyss7)). +* Do not silently ignore write errors. [#19451](https://github.com/ClickHouse/ClickHouse/pull/19451) ([Azat Khuzhin](https://github.com/azat)). +* Added support for `PREWHERE` when tables have row-level security expressions specified. [#19576](https://github.com/ClickHouse/ClickHouse/pull/19576) ([Denis Glazachev](https://github.com/traceon)). +* Add IStoragePolicy interface. [#19608](https://github.com/ClickHouse/ClickHouse/pull/19608) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Add ability to throttle INSERT into Distributed based on amount of pending bytes for async send (`bytes_to_delay_insert`/`max_delay_to_insert` and `bytes_to_throw_insert` settings for `Distributed` engine has been added). [#19673](https://github.com/ClickHouse/ClickHouse/pull/19673) ([Azat Khuzhin](https://github.com/azat)). +* move Conditions that are not related to JOIN to where clause. [#18720](https://github.com/ClickHouse/ClickHouse/issues/18720). [#19685](https://github.com/ClickHouse/ClickHouse/pull/19685) ([hexiaoting](https://github.com/hexiaoting)). +* Add separate config directive for Buffer profile. [#19721](https://github.com/ClickHouse/ClickHouse/pull/19721) ([Azat Khuzhin](https://github.com/azat)). +* Show MaterializeMySQL tables in `system.parts`. [#19770](https://github.com/ClickHouse/ClickHouse/pull/19770) ([Stig Bakken](https://github.com/stigsb)). +* Initialize MaxDDLEntryID to the last value after restarting. Before this PR, MaxDDLEntryID will remain zero until a new DDLTask is processed. [#19924](https://github.com/ClickHouse/ClickHouse/pull/19924) ([Amos Bird](https://github.com/amosbird)). +* Add conversion of block structure for INSERT into Distributed tables if it does not match. [#19947](https://github.com/ClickHouse/ClickHouse/pull/19947) ([Azat Khuzhin](https://github.com/azat)). +* If user calls `JSONExtract` function with `Float32` type requested, allow inaccurate conversion to the result type. For example the number `0.1` in JSON is double precision and is not representable in Float32, but the user still wants to get it. Previous versions return 0 for non-Nullable type and NULL for Nullable type to indicate that conversion is imprecise. The logic was 100% correct but it was surprising to users and leading to questions. This closes [#13962](https://github.com/ClickHouse/ClickHouse/issues/13962). [#19960](https://github.com/ClickHouse/ClickHouse/pull/19960) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The value of MYSQL_OPT_RECONNECT option can now be controlled by "opt_reconnect" parameter in the config section of mysql replica. [#19998](https://github.com/ClickHouse/ClickHouse/pull/19998) ([Alexander Kazakov](https://github.com/Akazz)). +* Return `DiskType` instead of `String` in IDisk::getType() as in the rest of storage interfaces. [#19999](https://github.com/ClickHouse/ClickHouse/pull/19999) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* Fix data race in executable dictionary that was possible only on misuse (when the script returns data ignoring its input). [#20045](https://github.com/ClickHouse/ClickHouse/pull/20045) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Show full details of MaterializeMySQL tables in `system.tables`. [#20051](https://github.com/ClickHouse/ClickHouse/pull/20051) ([Stig Bakken](https://github.com/stigsb)). +* Supports system.zookeeper path IN query. [#20105](https://github.com/ClickHouse/ClickHouse/pull/20105) ([小路](https://github.com/nicelulu)). +* 1. SHOW TABLES is now considered as one query in the quota calculations, not two queries. 2. SYSTEM queries now consume quota. 3. Fix calculation of interval's end in quota consumption. [#20106](https://github.com/ClickHouse/ClickHouse/pull/20106) ([Vitaly Baranov](https://github.com/vitlibar)). +* - Fix toDateTime64(toDate()/toDateTime()) for DateTime64 - Implement DateTime64 clamping to match DateTime behaviour. [#20131](https://github.com/ClickHouse/ClickHouse/pull/20131) ([Azat Khuzhin](https://github.com/azat)). +* The setting `access_management` is now configurable on startup by providing `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT`, defaults to disabled (`0`) which was the prior value. [#20139](https://github.com/ClickHouse/ClickHouse/pull/20139) ([Marquitos](https://github.com/sonirico)). +* Updated `CacheDictionary`, `ComplexCacheDictionary`, `SSDCacheDictionary`, `SSDComplexKeyDictionary` to use LRUHashMap as underlying index. [#20164](https://github.com/ClickHouse/ClickHouse/pull/20164) ([Maksim Kita](https://github.com/kitaisreal)). +* Support all native integer types in bitmap functions. [#20171](https://github.com/ClickHouse/ClickHouse/pull/20171) ([Amos Bird](https://github.com/amosbird)). +* Normalize count(constant), sum(1) to count(). This is needed for projection query routing. [#20175](https://github.com/ClickHouse/ClickHouse/pull/20175) ([Amos Bird](https://github.com/amosbird)). +* Perform algebraic optimizations of arithmetic expressions inside `avg` aggregate function. close [#20092](https://github.com/ClickHouse/ClickHouse/issues/20092). [#20183](https://github.com/ClickHouse/ClickHouse/pull/20183) ([flynn](https://github.com/ucasfl)). +* Lockless `SYSTEM FLUSH DISTRIBUTED`. [#20215](https://github.com/ClickHouse/ClickHouse/pull/20215) ([Azat Khuzhin](https://github.com/azat)). +* Implicit conversion from integer to Dicimal type might succeeded if integer value doe not fit into Decimal type. Now it throws `ARGUMENT_OUT_OF_BOUND`. [#20232](https://github.com/ClickHouse/ClickHouse/pull/20232) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not allow early constant folding of explicitly forbidden functions. [#20303](https://github.com/ClickHouse/ClickHouse/pull/20303) ([Azat Khuzhin](https://github.com/azat)). +* Make FQDN and other DNS related functions work correctly in alpine images. [#20336](https://github.com/ClickHouse/ClickHouse/pull/20336) ([filimonov](https://github.com/filimonov)). +* Fixed race between execution of distributed DDL tasks and cleanup of DDL queue. Now DDL task cannot be removed from ZooKeeper if there are active workers. Fixes [#20016](https://github.com/ClickHouse/ClickHouse/issues/20016). [#20448](https://github.com/ClickHouse/ClickHouse/pull/20448) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improved serialization for data types combined of Arrays and Tuples. Improved matching enum data types to protobuf enum type. Fixed serialization of the `Map` data type. Omitted values are now set by default. [#20506](https://github.com/ClickHouse/ClickHouse/pull/20506) ([Vitaly Baranov](https://github.com/vitlibar)). +* https://github.com/ClickHouse/ClickHouse/issues/20576. [#20596](https://github.com/ClickHouse/ClickHouse/pull/20596) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Function 'reinterpretAs(x, Type)' renamed into 'reinterpret(x, Type)'. [#20611](https://github.com/ClickHouse/ClickHouse/pull/20611) ([Maksim Kita](https://github.com/kitaisreal)). +* When loading config for mysql source ClickHouse will now randomize the list of replicas with the same priority to ensure the round-robin logics of picking mysql endpoint. This closes [#20629](https://github.com/ClickHouse/ClickHouse/issues/20629). [#20632](https://github.com/ClickHouse/ClickHouse/pull/20632) ([Alexander Kazakov](https://github.com/Akazz)). +* Do only merging of sorted blocks on initiator with distributed_group_by_no_merge. [#20882](https://github.com/ClickHouse/ClickHouse/pull/20882) ([Azat Khuzhin](https://github.com/azat)). +* - Fill only requested columns when querying system.parts & system.parts_columns. Closes [#19570](https://github.com/ClickHouse/ClickHouse/issues/19570). ... [#21035](https://github.com/ClickHouse/ClickHouse/pull/21035) ([Anmol Arora](https://github.com/anmolarora)). +* Usability improvement: more consistent `DateTime64` parsing: recognize the case when unix timestamp with subsecond resolution is specified as scaled integer (like `1111111111222` instead of `1111111111.222`). This closes [#13194](https://github.com/ClickHouse/ClickHouse/issues/13194). [#21053](https://github.com/ClickHouse/ClickHouse/pull/21053) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* MySQL dictionary source will now retry unexpected connection failures (Lost connection to MySQL server during query) which sometimes happen on SSL/TLS connections. [#21237](https://github.com/ClickHouse/ClickHouse/pull/21237) ([Alexander Kazakov](https://github.com/Akazz)). +* Forbid to drop a column if it's referenced by materialized view. Closes [#21164](https://github.com/ClickHouse/ClickHouse/issues/21164). [#21303](https://github.com/ClickHouse/ClickHouse/pull/21303) ([flynn](https://github.com/ucasfl)). +* Provide better compatibility for mysql clients. 1. mysql jdbc 2. mycli. [#21367](https://github.com/ClickHouse/ClickHouse/pull/21367) ([Amos Bird](https://github.com/amosbird)). +* Case-insensitive compression methods for table functions. Also fixed `LZMA` compression method which was checked in upper case. [#21416](https://github.com/ClickHouse/ClickHouse/pull/21416) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Bug Fix +* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Fix a bug that moving pieces to destination table may failed in case of launching multiple clickhouse-copiers. [#19743](https://github.com/ClickHouse/ClickHouse/pull/19743) ([madianjun](https://github.com/mdianjun)). +* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([李扬](https://github.com/taiyang-li)). +* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* - Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). +* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). +* MaterializeMySQL: Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). +* Fix the case when calculating modulo of division of negative number by small divisor, the resulting data type was not large enough to accomodate the negative result. This closes [#20052](https://github.com/ClickHouse/ClickHouse/issues/20052). [#20067](https://github.com/ClickHouse/ClickHouse/pull/20067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* * Bugfix in StorageJoin. [#20079](https://github.com/ClickHouse/ClickHouse/pull/20079) ([Vladimir C](https://github.com/vdimir)). +* The `MongoDB` table engine now establishes connection only when it's going to read data. `ATTACH TABLE` won't try to connect anymore. [#20110](https://github.com/ClickHouse/ClickHouse/pull/20110) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). +* fix toMinute function to handle special timezone correctly. [#20149](https://github.com/ClickHouse/ClickHouse/pull/20149) ([keenwolf](https://github.com/keen-wolf)). +* Fixes [#19314](https://github.com/ClickHouse/ClickHouse/issues/19314). [#20156](https://github.com/ClickHouse/ClickHouse/pull/20156) ([Ivan](https://github.com/abyss7)). +* Fix CTE when using in INSERT SELECT. This fixes [#20187](https://github.com/ClickHouse/ClickHouse/issues/20187), fixes [#20195](https://github.com/ClickHouse/ClickHouse/issues/20195). [#20211](https://github.com/ClickHouse/ClickHouse/pull/20211) ([Amos Bird](https://github.com/amosbird)). +* Fix rare server crash on config reload during the shutdown. Fixes [#19689](https://github.com/ClickHouse/ClickHouse/issues/19689). [#20224](https://github.com/ClickHouse/ClickHouse/pull/20224) ([alesapin](https://github.com/alesapin)). +* Fix exception during vertical merge for `MergeTree` table engines family which don't allow to perform vertical merges. Fixes [#20259](https://github.com/ClickHouse/ClickHouse/issues/20259). [#20279](https://github.com/ClickHouse/ClickHouse/pull/20279) ([alesapin](https://github.com/alesapin)). +* Fixed the behavior when in case of broken JSON we tried to read the whole file into memory which leads to exception from the allocator. Fixes [#19719](https://github.com/ClickHouse/ClickHouse/issues/19719). [#20286](https://github.com/ClickHouse/ClickHouse/pull/20286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Restrict to `DROP` or `RENAME` version column of `*CollapsingMergeTree` and `ReplacingMergeTree` table engines. [#20300](https://github.com/ClickHouse/ClickHouse/pull/20300) ([alesapin](https://github.com/alesapin)). +* Fix too often retries of failed background tasks for `ReplicatedMergeTree` table engines family. This could lead to too verbose logging and increased CPU load. Fixes [#20203](https://github.com/ClickHouse/ClickHouse/issues/20203). [#20335](https://github.com/ClickHouse/ClickHouse/pull/20335) ([alesapin](https://github.com/alesapin)). +* Fix incorrect result of binary operations between two constant decimals of different scale. Fixes [#20283](https://github.com/ClickHouse/ClickHouse/issues/20283). [#20339](https://github.com/ClickHouse/ClickHouse/pull/20339) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix null dereference with `join_use_nulls=1`. [#20344](https://github.com/ClickHouse/ClickHouse/pull/20344) ([Azat Khuzhin](https://github.com/azat)). +* Avoid invalid dereference in RANGE_HASHED() dictionary. [#20345](https://github.com/ClickHouse/ClickHouse/pull/20345) ([Azat Khuzhin](https://github.com/azat)). +* Check if table function `view` is used in expression list and throw an error. This fixes [#20342](https://github.com/ClickHouse/ClickHouse/issues/20342). [#20350](https://github.com/ClickHouse/ClickHouse/pull/20350) ([Amos Bird](https://github.com/amosbird)). +* Fix `LOGICAL_ERROR` for `join_use_nulls=1` when JOIN contains const from SELECT. [#20461](https://github.com/ClickHouse/ClickHouse/pull/20461) ([Azat Khuzhin](https://github.com/azat)). +* Fix abnormal server termination when http client goes away. [#20464](https://github.com/ClickHouse/ClickHouse/pull/20464) ([Azat Khuzhin](https://github.com/azat)). +* Fix infinite loop when propagating WITH aliases to subqueries. This fixes [#20388](https://github.com/ClickHouse/ClickHouse/issues/20388). [#20476](https://github.com/ClickHouse/ClickHouse/pull/20476) ([Amos Bird](https://github.com/amosbird)). +* Fix function `transform` does not work properly for floating point keys. Closes [#20460](https://github.com/ClickHouse/ClickHouse/issues/20460). [#20479](https://github.com/ClickHouse/ClickHouse/pull/20479) ([flynn](https://github.com/ucasfl)). +* Add proper checks while parsing directory names for async INSERT (fixes SIGSEGV). [#20498](https://github.com/ClickHouse/ClickHouse/pull/20498) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash which could happen if unknown packet was received from remove query (was introduced in [#17868](https://github.com/ClickHouse/ClickHouse/issues/17868)). [#20547](https://github.com/ClickHouse/ClickHouse/pull/20547) ([Azat Khuzhin](https://github.com/azat)). +* Fix the number of threads for scalar subqueries and subqueries for index (after [#19007](https://github.com/ClickHouse/ClickHouse/issues/19007) single thread was always used). Fixes [#20457](https://github.com/ClickHouse/ClickHouse/issues/20457), [#20512](https://github.com/ClickHouse/ClickHouse/issues/20512). [#20550](https://github.com/ClickHouse/ClickHouse/pull/20550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed inconsistent behavior of dictionary in case of queries where we look for absent keys in dictionary. [#20578](https://github.com/ClickHouse/ClickHouse/pull/20578) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix subquery with union distinct and limit clause. close [#20597](https://github.com/ClickHouse/ClickHouse/issues/20597). [#20610](https://github.com/ClickHouse/ClickHouse/pull/20610) ([flynn](https://github.com/ucasfl)). +* Backported in [#21571](https://github.com/ClickHouse/ClickHouse/issues/21571): `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix usage of `-Distinct` combinator with `-State` combinator in aggregate functions. [#20866](https://github.com/ClickHouse/ClickHouse/pull/20866) ([Anton Popov](https://github.com/CurtizJ)). +* `USE database;` query did not work when using MySQL 5.7 client to connect to ClickHouse server, it's fixed. Fixes [#18926](https://github.com/ClickHouse/ClickHouse/issues/18926). [#20878](https://github.com/ClickHouse/ClickHouse/pull/20878) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 'Empty task was returned from async task queue' on query cancellation. [#20881](https://github.com/ClickHouse/ClickHouse/pull/20881) ([Azat Khuzhin](https://github.com/azat)). +* Closes [#9969](https://github.com/ClickHouse/ClickHouse/issues/9969). Fixed Brotli http compression error, which reproduced for large data sizes, slightly complicated structure and with json output format. Update Brotli to the latest version to include the "fix rare access to uninitialized data in ring-buffer". [#20991](https://github.com/ClickHouse/ClickHouse/pull/20991) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed behaviour, when `ALTER MODIFY COLUMN` created mutation, that will knowingly fail. [#21007](https://github.com/ClickHouse/ClickHouse/pull/21007) ([Anton Popov](https://github.com/CurtizJ)). +* - Block parallel insertions into storage join. [#21009](https://github.com/ClickHouse/ClickHouse/pull/21009) ([Vladimir C](https://github.com/vdimir)). +* Out of bound memory access was possible when formatting specifically crafted out of range value of type `DateTime64`. This closes [#20494](https://github.com/ClickHouse/ClickHouse/issues/20494). This closes [#20543](https://github.com/ClickHouse/ClickHouse/issues/20543). [#21023](https://github.com/ClickHouse/ClickHouse/pull/21023) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix default_replica_path and default_replica_name values are useless on Replicated(*)MergeTree engine when the engine needs specify other parameters. [#21060](https://github.com/ClickHouse/ClickHouse/pull/21060) ([mxzlxy](https://github.com/mxzlxy)). +* Fix type mismatch issue when using LowCardinality keys in joinGet. This fixes [#21114](https://github.com/ClickHouse/ClickHouse/issues/21114). [#21117](https://github.com/ClickHouse/ClickHouse/pull/21117) ([Amos Bird](https://github.com/amosbird)). +* Fix the metadata leak when the Replicated*MergeTree with custom (non default) ZooKeeper cluster is dropped. [#21119](https://github.com/ClickHouse/ClickHouse/pull/21119) ([fastio](https://github.com/fastio)). +* fix bug related to cast tuple to map. Closes [#21029](https://github.com/ClickHouse/ClickHouse/issues/21029). [#21120](https://github.com/ClickHouse/ClickHouse/pull/21120) ([hexiaoting](https://github.com/hexiaoting)). +* Fix `input_format_null_as_default` take effective when types are nullable. This fixes [#21116](https://github.com/ClickHouse/ClickHouse/issues/21116) . [#21121](https://github.com/ClickHouse/ClickHouse/pull/21121) ([Amos Bird](https://github.com/amosbird)). +* Fixes [#21112](https://github.com/ClickHouse/ClickHouse/issues/21112). Fixed bug that could cause duplicates with insert query (if one of the callbacks came a little too late). [#21138](https://github.com/ClickHouse/ClickHouse/pull/21138) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Now mutations allowed only for table engines that support them (MergeTree family, Memory, MaterializedView). Other engines will report a more clear error. Fixes [#21168](https://github.com/ClickHouse/ClickHouse/issues/21168). [#21183](https://github.com/ClickHouse/ClickHouse/pull/21183) ([alesapin](https://github.com/alesapin)). +* Fix crash in `EXPLAIN` for query with `UNION`. Fixes [#20876](https://github.com/ClickHouse/ClickHouse/issues/20876), [#21170](https://github.com/ClickHouse/ClickHouse/issues/21170). [#21246](https://github.com/ClickHouse/ClickHouse/pull/21246) ([flynn](https://github.com/ucasfl)). +* Fix bug with `join_use_nulls` and joining `TOTALS` from subqueries. This closes [#19362](https://github.com/ClickHouse/ClickHouse/issues/19362) and [#21137](https://github.com/ClickHouse/ClickHouse/issues/21137). [#21248](https://github.com/ClickHouse/ClickHouse/pull/21248) ([Vladimir C](https://github.com/vdimir)). +* Fix redundant reconnects to ZooKeeper and the possibility of two active sessions for a single clickhouse server. Both problems introduced in [#14678](https://github.com/ClickHouse/ClickHouse/issues/14678). [#21264](https://github.com/ClickHouse/ClickHouse/pull/21264) ([alesapin](https://github.com/alesapin)). +* Now `ALTER MODIFY COLUMN` queries will correctly affect changes in partition key, skip indices, TTLs, and so on. Fixes [#13675](https://github.com/ClickHouse/ClickHouse/issues/13675). [#21334](https://github.com/ClickHouse/ClickHouse/pull/21334) ([alesapin](https://github.com/alesapin)). +* Fix error `Bad cast from type ... to DB::ColumnLowCardinality` while inserting into table with `LowCardinality` column from `Values` format. Fixes [#21140](https://github.com/ClickHouse/ClickHouse/issues/21140). [#21357](https://github.com/ClickHouse/ClickHouse/pull/21357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV for distributed queries on failures. [#21434](https://github.com/ClickHouse/ClickHouse/pull/21434) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21610](https://github.com/ClickHouse/ClickHouse/issues/21610): Fixed race on SSL object inside SecureSocket in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix a deadlock in `ALTER DELETE` mutations for non replicated MergeTree table engines when the predicate contains the table itself. Fixes [#20558](https://github.com/ClickHouse/ClickHouse/issues/20558). [#21477](https://github.com/ClickHouse/ClickHouse/pull/21477) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement +* Fixed port clash from test_storage_kerberized_hdfs test. [#19974](https://github.com/ClickHouse/ClickHouse/pull/19974) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix some of the issues found by Coverity. See [#19964](https://github.com/ClickHouse/ClickHouse/issues/19964). [#20010](https://github.com/ClickHouse/ClickHouse/pull/20010) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to build ClickHouse with AVX-2 enabled globally. It gives slight performance benefits on modern CPUs. Not recommended for production and will not be supported as official build for now. [#20180](https://github.com/ClickHouse/ClickHouse/pull/20180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add some checks in order by fix the bug https://clickhouse-test-reports.s3.yandex.net/20472/5bdc57004682a5e0236ec630546d20ad752c2fde/stress_test_(thread)/stderr.log. [#20516](https://github.com/ClickHouse/ClickHouse/pull/20516) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Print `stdout` and `stderr` to log when failed to start docker in integration tests. Before this PR there was a very short error message in this case which didn't help to investigate the problems:. [#20631](https://github.com/ClickHouse/ClickHouse/pull/20631) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove most of `sleep` commands from kafka integration tests, waiting for log events instead. Add test for different compression methods. [#21111](https://github.com/ClickHouse/ClickHouse/pull/21111) ([filimonov](https://github.com/filimonov)). +* Allow to start up with modified binary under gdb. In previous version if you set up breakpoint in gdb before start, server will refuse to start up due to failed integrity check. [#21258](https://github.com/ClickHouse/ClickHouse/pull/21258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Experimental feature + +* Introduce experimental support for window functions, enabled with `allow_experimental_functions = 1`. This is a preliminary, alpha-quality implementation that is not suitable for production use and will change in backward-incompatible ways in future releases. Please see [the documentation](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/sql-reference/window-functions/index.md#experimental-window-functions) for the list of supported features. [#20337](https://github.com/ClickHouse/ClickHouse/pull/20337) ([Alexander Kuzmenkov](https://github.com/akuzm)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Fix access control manager destruction order"'. [#20394](https://github.com/ClickHouse/ClickHouse/pull/20394) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Update argmax.md '. [#20625](https://github.com/ClickHouse/ClickHouse/pull/20625) ([Marvin Taschenberger](https://github.com/Taschenbergerm)). + diff --git a/docs/changelogs/v21.3.20.1-lts.md b/docs/changelogs/v21.3.20.1-lts.md new file mode 100644 index 00000000000..ac8c7d2ece2 --- /dev/null +++ b/docs/changelogs/v21.3.20.1-lts.md @@ -0,0 +1,10 @@ +### ClickHouse release v21.3.20.1-lts FIXME as compared to v21.3.19.1-lts + +#### Bug Fix +* Backported in [#32690](https://github.com/ClickHouse/ClickHouse/issues/32690): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). +* Backported in [#33727](https://github.com/ClickHouse/ClickHouse/issues/33727): Fix null pointer dereference in low cardinality data when deserializing LowCardinality data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32791](https://github.com/ClickHouse/ClickHouse/issues/32791): fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). + diff --git a/docs/changelogs/v21.3.3.14-lts.md b/docs/changelogs/v21.3.3.14-lts.md new file mode 100644 index 00000000000..3dbfd7f0a04 --- /dev/null +++ b/docs/changelogs/v21.3.3.14-lts.md @@ -0,0 +1,11 @@ +### ClickHouse release v21.3.3.14-lts FIXME as compared to v21.3.2.5-lts + +#### Bug Fix +* Backported in [#21644](https://github.com/ClickHouse/ClickHouse/issues/21644): Fix that S3 table holds old credentials after config update. [#21457](https://github.com/ClickHouse/ClickHouse/pull/21457) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* Backported in [#21854](https://github.com/ClickHouse/ClickHouse/issues/21854): Fix possible error ` Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21687](https://github.com/ClickHouse/ClickHouse/issues/21687): Fix fsync_part_directory for horizontal merge. [#21642](https://github.com/ClickHouse/ClickHouse/pull/21642) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21796](https://github.com/ClickHouse/ClickHouse/issues/21796): Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21811](https://github.com/ClickHouse/ClickHouse/issues/21811): Fix bug for ReplicatedMerge table engines when `ALTER MODIFY COLUMN` query doesn't change the type of decimal column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)). +* Backported in [#21883](https://github.com/ClickHouse/ClickHouse/issues/21883): Reverted S3 connection pools. [#21737](https://github.com/ClickHouse/ClickHouse/pull/21737) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#21874](https://github.com/ClickHouse/ClickHouse/issues/21874): Fix incorrect query result (and possible crash) which could happen when `WHERE` or `HAVING` condition is pushed before `GROUP BY`. Fixes [#21773](https://github.com/ClickHouse/ClickHouse/issues/21773). [#21841](https://github.com/ClickHouse/ClickHouse/pull/21841) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.3.4.25-lts.md b/docs/changelogs/v21.3.4.25-lts.md new file mode 100644 index 00000000000..431c498dbed --- /dev/null +++ b/docs/changelogs/v21.3.4.25-lts.md @@ -0,0 +1,18 @@ +### ClickHouse release v21.3.4.25-lts FIXME as compared to v21.3.3.14-lts + +#### Bug Fix +* Backported in [#21928](https://github.com/ClickHouse/ClickHouse/issues/21928): Fix Avro format parsing for Kafka. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)). +* Backported in [#22089](https://github.com/ClickHouse/ClickHouse/issues/22089): In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#21793](https://github.com/ClickHouse/ClickHouse/issues/21793): `std::terminate` was called if there is an error writing data into s3. [#21624](https://github.com/ClickHouse/ClickHouse/pull/21624) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#21746](https://github.com/ClickHouse/ClickHouse/issues/21746): Start accepting connections after DDLWorker and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21749](https://github.com/ClickHouse/ClickHouse/issues/21749): Fix concurrent `OPTIMIZE` and `DROP` for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21878](https://github.com/ClickHouse/ClickHouse/issues/21878): Fix possible crashes in aggregate functions with combinator Distinct, while using two-level aggregation. This is a follow-up fix of https://github.com/ClickHouse/ClickHouse/pull/18365 . Can only reproduced in production env. No test case available yet. cc @CurtizJ. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22051](https://github.com/ClickHouse/ClickHouse/issues/22051): Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#21993](https://github.com/ClickHouse/ClickHouse/issues/21993): Fix wrong `ORDER BY` results when a query contains window functions, and optimization for reading in primary key order is applied. Fixes [#21828](https://github.com/ClickHouse/ClickHouse/issues/21828). [#21915](https://github.com/ClickHouse/ClickHouse/pull/21915) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#22180](https://github.com/ClickHouse/ClickHouse/issues/22180): Fix reading the HTTP POST request with "multipart/form-data" content type. [#21936](https://github.com/ClickHouse/ClickHouse/pull/21936) ([Ivan](https://github.com/abyss7)). +* Backported in [#21982](https://github.com/ClickHouse/ClickHouse/issues/21982): Prevent hedged connections overlaps (`Unknown packet 9 from server` error). [#21941](https://github.com/ClickHouse/ClickHouse/pull/21941) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21980](https://github.com/ClickHouse/ClickHouse/issues/21980): Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#22139](https://github.com/ClickHouse/ClickHouse/issues/22139): The function `decrypt` was lacking a check for the minimal size of data encrypted in AEAD mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22185](https://github.com/ClickHouse/ClickHouse/issues/22185): Fix uncaught exception in InterserverIOHTTPHandler. [#22146](https://github.com/ClickHouse/ClickHouse/pull/22146) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22205](https://github.com/ClickHouse/ClickHouse/issues/22205): Use finalize() over next() for nested writers. [#22147](https://github.com/ClickHouse/ClickHouse/pull/22147) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.3.5.42-lts.md b/docs/changelogs/v21.3.5.42-lts.md new file mode 100644 index 00000000000..d8616efd0ff --- /dev/null +++ b/docs/changelogs/v21.3.5.42-lts.md @@ -0,0 +1,27 @@ +### ClickHouse release v21.3.5.42-lts FIXME as compared to v21.3.4.25-lts + +#### Bug Fix +* Backported in [#22335](https://github.com/ClickHouse/ClickHouse/issues/22335): Fix table function `clusterAllReplicas` returns wrong `_shard_num`. close [#21481](https://github.com/ClickHouse/ClickHouse/issues/21481). [#21498](https://github.com/ClickHouse/ClickHouse/pull/21498) ([flynn](https://github.com/ucasfl)). +* Backported in [#22319](https://github.com/ClickHouse/ClickHouse/issues/22319): Remove unknown columns from joined table in where for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#22241](https://github.com/ClickHouse/ClickHouse/issues/22241): Fix scalar subquery index analysis. This fixes [#21717](https://github.com/ClickHouse/ClickHouse/issues/21717) , which was introduced in https://github.com/ClickHouse/ClickHouse/pull/18896 . [#21766](https://github.com/ClickHouse/ClickHouse/pull/21766) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22150](https://github.com/ClickHouse/ClickHouse/issues/22150): Better error handling and logging in WriteBufferFromS3. [#21836](https://github.com/ClickHouse/ClickHouse/pull/21836) ([Pavel Kovalenko](https://github.com/Jokser)). +* Backported in [#22468](https://github.com/ClickHouse/ClickHouse/issues/22468): In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#22203](https://github.com/ClickHouse/ClickHouse/issues/22203): Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)). +* Backported in [#22261](https://github.com/ClickHouse/ClickHouse/issues/22261): Disable `async_socket_for_remote`/`use_hedged_requests` for buggy linux kernels. [#22109](https://github.com/ClickHouse/ClickHouse/pull/22109) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22281](https://github.com/ClickHouse/ClickHouse/issues/22281): Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)). +* Backported in [#22265](https://github.com/ClickHouse/ClickHouse/issues/22265): Fix the background thread pool name. [#22122](https://github.com/ClickHouse/ClickHouse/pull/22122) ([fastio](https://github.com/fastio)). +* Backported in [#22262](https://github.com/ClickHouse/ClickHouse/issues/22262): Fix error `Invalid number of rows in Chunk` in `JOIN` with `TOTALS` and `arrayJoin`. Closes [#19303](https://github.com/ClickHouse/ClickHouse/issues/19303). [#22129](https://github.com/ClickHouse/ClickHouse/pull/22129) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#22316](https://github.com/ClickHouse/ClickHouse/issues/22316): Fix docker entrypoint in case `http_port` is not in the config. [#22132](https://github.com/ClickHouse/ClickHouse/pull/22132) ([Ewout](https://github.com/devwout)). +* Backported in [#22313](https://github.com/ClickHouse/ClickHouse/issues/22313): Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22366](https://github.com/ClickHouse/ClickHouse/issues/22366): Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)). +* Backported in [#22562](https://github.com/ClickHouse/ClickHouse/issues/22562): Do not limit HTTP chunk size. Fixes [#21907](https://github.com/ClickHouse/ClickHouse/issues/21907). [#22322](https://github.com/ClickHouse/ClickHouse/pull/22322) ([Ivan](https://github.com/abyss7)). +* Backported in [#22531](https://github.com/ClickHouse/ClickHouse/issues/22531): Buffer overflow (on read) was possible in `tokenbf_v1` full text index. The excessive bytes are not used but the read operation may lead to crash in rare cases. This closes [#19233](https://github.com/ClickHouse/ClickHouse/issues/19233). [#22421](https://github.com/ClickHouse/ClickHouse/pull/22421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22463](https://github.com/ClickHouse/ClickHouse/issues/22463): Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22507](https://github.com/ClickHouse/ClickHouse/issues/22507): Remove socket from epoll before cancelling packet receiver in HedgedConnections to prevent possible race. I hope it fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#22554](https://github.com/ClickHouse/ClickHouse/issues/22554): Fix bug in partial merge join with `LowCardinality`. Close [#22386](https://github.com/ClickHouse/ClickHouse/issues/22386), close [#22388](https://github.com/ClickHouse/ClickHouse/issues/22388). [#22510](https://github.com/ClickHouse/ClickHouse/pull/22510) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#22607](https://github.com/ClickHouse/ClickHouse/issues/22607): Fix deserialization of empty string without newline at end of TSV format. This closes [#20244](https://github.com/ClickHouse/ClickHouse/issues/20244). Possible workaround without version update: set `input_format_null_as_default` to zero. It was zero in old versions. [#22527](https://github.com/ClickHouse/ClickHouse/pull/22527) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22576](https://github.com/ClickHouse/ClickHouse/issues/22576): Fix UB by unlocking the rwlock of the TinyLog from the same thread. [#22560](https://github.com/ClickHouse/ClickHouse/pull/22560) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22651](https://github.com/ClickHouse/ClickHouse/issues/22651): Avoid UB in *Log engines for rwlock unlock due to unlock from another thread. [#22583](https://github.com/ClickHouse/ClickHouse/pull/22583) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22653](https://github.com/ClickHouse/ClickHouse/issues/22653): Try flush write buffer only if it is initialized. Fixes segfault when client closes connection very early [#22579](https://github.com/ClickHouse/ClickHouse/issues/22579). [#22591](https://github.com/ClickHouse/ClickHouse/pull/22591) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#22681](https://github.com/ClickHouse/ClickHouse/issues/22681): Fix LOGICAL_ERROR for Log with nested types w/o columns in the SELECT clause. [#22654](https://github.com/ClickHouse/ClickHouse/pull/22654) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.3.6.55-lts.md b/docs/changelogs/v21.3.6.55-lts.md new file mode 100644 index 00000000000..68980af6264 --- /dev/null +++ b/docs/changelogs/v21.3.6.55-lts.md @@ -0,0 +1,19 @@ +### ClickHouse release v21.3.6.55-lts FIXME as compared to v21.3.5.42-lts + +#### Improvement +* Backported in [#22811](https://github.com/ClickHouse/ClickHouse/issues/22811): If PODArray was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22823](https://github.com/ClickHouse/ClickHouse/issues/22823): Add profile event HedgedRequestsChangeReplica, change read data timeout from sec to ms. [#21886](https://github.com/ClickHouse/ClickHouse/pull/21886) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#22936](https://github.com/ClickHouse/ClickHouse/issues/22936): Correctly check structure of async distributed blocks. [#22325](https://github.com/ClickHouse/ClickHouse/pull/22325) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Backported in [#22968](https://github.com/ClickHouse/ClickHouse/issues/22968): Fix very rare bug when quorum insert with `quorum_parallel=1` is not really "quorum" because of deduplication. [#18215](https://github.com/ClickHouse/ClickHouse/pull/18215) ([filimonov](https://github.com/filimonov)). +* Backported in [#22723](https://github.com/ClickHouse/ClickHouse/issues/22723): Check if table function view is used as a column. This complements https://github.com/ClickHouse/ClickHouse/pull/20350. [#21465](https://github.com/ClickHouse/ClickHouse/pull/21465) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22807](https://github.com/ClickHouse/ClickHouse/issues/22807): Follow-up fix for [#21936](https://github.com/ClickHouse/ClickHouse/issues/21936). Also fixes [#22433](https://github.com/ClickHouse/ClickHouse/issues/22433). [#22518](https://github.com/ClickHouse/ClickHouse/pull/22518) ([Ivan](https://github.com/abyss7)). +* Backported in [#22759](https://github.com/ClickHouse/ClickHouse/issues/22759): Fix usage of function `map` in distributed queries. [#22588](https://github.com/ClickHouse/ClickHouse/pull/22588) ([foolchi](https://github.com/foolchi)). +* Backported in [#22701](https://github.com/ClickHouse/ClickHouse/issues/22701): Fix wait for mutations on several replicas for ReplicatedMergeTree table engines. Previously, mutation/alter query may finish before mutation actually executed on other replicas. [#22669](https://github.com/ClickHouse/ClickHouse/pull/22669) ([alesapin](https://github.com/alesapin)). +* Backported in [#22738](https://github.com/ClickHouse/ClickHouse/issues/22738): Fix possible hangs in zk requests in case of OOM exception. Fixes [#22438](https://github.com/ClickHouse/ClickHouse/issues/22438). [#22684](https://github.com/ClickHouse/ClickHouse/pull/22684) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#22892](https://github.com/ClickHouse/ClickHouse/issues/22892): Fix approx total rows accounting for reverse reading from MergeTree. [#22726](https://github.com/ClickHouse/ClickHouse/pull/22726) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22922](https://github.com/ClickHouse/ClickHouse/issues/22922): Fix pushdown of `HAVING` in case, when filter column is used in aggregation. [#22763](https://github.com/ClickHouse/ClickHouse/pull/22763) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#22916](https://github.com/ClickHouse/ClickHouse/issues/22916): LIVE VIEW (experimental feature). Fix possible hanging in concurrent DROP/CREATE of TEMPORARY LIVE VIEW in `TemporaryLiveViewCleaner`, see https://gist.github.com/vzakaznikov/0c03195960fc86b56bfe2bc73a90019e. [#22858](https://github.com/ClickHouse/ClickHouse/pull/22858) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#22920](https://github.com/ClickHouse/ClickHouse/issues/22920): Fixed a crash when using `mannWhitneyUTest` and `rankCorr` with window functions. This fixes [#22728](https://github.com/ClickHouse/ClickHouse/issues/22728). [#22876](https://github.com/ClickHouse/ClickHouse/pull/22876) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + diff --git a/docs/changelogs/v21.3.7.62-stable.md b/docs/changelogs/v21.3.7.62-stable.md new file mode 100644 index 00000000000..df919be0f42 --- /dev/null +++ b/docs/changelogs/v21.3.7.62-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.3.7.62-stable FIXME as compared to v21.3.6.55-lts + +#### Improvement +* Backported in [#23014](https://github.com/ClickHouse/ClickHouse/issues/23014): Set `background_fetches_pool_size` to 8 that is better for production usage with frequent small insertions or slow ZooKeeper cluster. [#22945](https://github.com/ClickHouse/ClickHouse/pull/22945) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23081](https://github.com/ClickHouse/ClickHouse/issues/23081): Raised the threshold on max number of matches in result of the function `extractAllGroupsHorizontal`. [#23036](https://github.com/ClickHouse/ClickHouse/pull/23036) ([Vasily Nemkov](https://github.com/Enmk)). + +#### Bug Fix +* Backported in [#23156](https://github.com/ClickHouse/ClickHouse/issues/23156): Fixed a bug with unlimited wait for auxiliary AWS requests. [#22594](https://github.com/ClickHouse/ClickHouse/pull/22594) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#23033](https://github.com/ClickHouse/ClickHouse/issues/23033): Fix error `Cannot find column in ActionsDAG result` which may happen if subquery uses `untuple`. Fixes [#22290](https://github.com/ClickHouse/ClickHouse/issues/22290). [#22991](https://github.com/ClickHouse/ClickHouse/pull/22991) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23075](https://github.com/ClickHouse/ClickHouse/issues/23075): Remove non-essential details from suggestions in clickhouse-client. This closes [#22158](https://github.com/ClickHouse/ClickHouse/issues/22158). [#23040](https://github.com/ClickHouse/ClickHouse/pull/23040) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23170](https://github.com/ClickHouse/ClickHouse/issues/23170): Some values were formatted with alignment in center in table cells in `Markdown` format. Not anymore. [#23096](https://github.com/ClickHouse/ClickHouse/pull/23096) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.3.8.76-lts.md b/docs/changelogs/v21.3.8.76-lts.md new file mode 100644 index 00000000000..2002d9e3e1f --- /dev/null +++ b/docs/changelogs/v21.3.8.76-lts.md @@ -0,0 +1,21 @@ +### ClickHouse release v21.3.8.76-lts FIXME as compared to v21.3.7.62-stable + +#### Improvement +* Backported in [#23396](https://github.com/ClickHouse/ClickHouse/issues/23396): If tuple of NULLs, e.g. `(NULL, NULL)` is on the left hand side of `IN` operator with tuples of non-NULLs on the right hand side, e.g. `SELECT (NULL, NULL) IN ((0, 0), (3, 1))` return 0 instead of throwing an exception about incompatible types. The expression may also appear due to optimization of something like `SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1)`. This closes [#22017](https://github.com/ClickHouse/ClickHouse/issues/22017). [#22063](https://github.com/ClickHouse/ClickHouse/pull/22063) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23272](https://github.com/ClickHouse/ClickHouse/issues/23272): Disable settings `use_hedged_requests` and `async_socket_for_remote` because there is an evidence that it may cause issues. [#23261](https://github.com/ClickHouse/ClickHouse/pull/23261) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#23232](https://github.com/ClickHouse/ClickHouse/issues/23232): Server might fail to start if `data_type_default_nullable` setting is enabled in default profile, it's fixed. Fixes [#22573](https://github.com/ClickHouse/ClickHouse/issues/22573). [#23185](https://github.com/ClickHouse/ClickHouse/pull/23185) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23406](https://github.com/ClickHouse/ClickHouse/issues/23406): QueryAliasVisitor to prefer alias for ASTWithAlias if subquery was optimized to constant. Fixes [#22924](https://github.com/ClickHouse/ClickHouse/issues/22924). Fixes [#10401](https://github.com/ClickHouse/ClickHouse/issues/10401). [#23191](https://github.com/ClickHouse/ClickHouse/pull/23191) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23381](https://github.com/ClickHouse/ClickHouse/issues/23381): Fixed `Not found column` error when selecting from `MaterializeMySQL` with condition on key column. Fixes [#22432](https://github.com/ClickHouse/ClickHouse/issues/22432). [#23200](https://github.com/ClickHouse/ClickHouse/pull/23200) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23284](https://github.com/ClickHouse/ClickHouse/issues/23284): Fixed simple key dictionary from DDL creation if primary key is not first attribute. Fixes [#23236](https://github.com/ClickHouse/ClickHouse/issues/23236). [#23262](https://github.com/ClickHouse/ClickHouse/pull/23262) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23450](https://github.com/ClickHouse/ClickHouse/issues/23450): Fixed very rare (distributed) race condition between creation and removal of ReplicatedMergeTree tables. It might cause exceptions like `node doesn't exist` on attempt to create replicated table. Fixes [#21419](https://github.com/ClickHouse/ClickHouse/issues/21419). [#23294](https://github.com/ClickHouse/ClickHouse/pull/23294) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23500](https://github.com/ClickHouse/ClickHouse/issues/23500): Fix possible crash in case if `unknown packet` was received form remote query (with `async_socket_for_remote` enabled). Maybe fixes [#21167](https://github.com/ClickHouse/ClickHouse/issues/21167). [#23309](https://github.com/ClickHouse/ClickHouse/pull/23309) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23468](https://github.com/ClickHouse/ClickHouse/issues/23468): `ORDER BY` with `COLLATE` was not working correctly if the column is in primary key (or is a monotonic function of it) and the setting `optimize_read_in_order` is not turned off. This closes [#22379](https://github.com/ClickHouse/ClickHouse/issues/22379). Workaround for older versions: turn the setting `optimize_read_in_order` off. [#23375](https://github.com/ClickHouse/ClickHouse/pull/23375) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23544](https://github.com/ClickHouse/ClickHouse/issues/23544): Remove support for `argMin` and `argMax` for single `Tuple` argument. The code was not memory-safe. The feature was added by mistake and it is confusing for people. These functions can be reintroduced under different names later. This fixes [#22384](https://github.com/ClickHouse/ClickHouse/issues/22384) and reverts [#17359](https://github.com/ClickHouse/ClickHouse/issues/17359). [#23393](https://github.com/ClickHouse/ClickHouse/pull/23393) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23479](https://github.com/ClickHouse/ClickHouse/issues/23479): Kafka storage may support `arrow` and `arrowstream` format messages. [#23415](https://github.com/ClickHouse/ClickHouse/pull/23415) ([Chao Ma](https://github.com/godliness)). +* Backported in [#23499](https://github.com/ClickHouse/ClickHouse/issues/23499): - Bug fix for `deltaSum` aggregate function in counter reset case ... [#23437](https://github.com/ClickHouse/ClickHouse/pull/23437) ([Russ Frank](https://github.com/rf)). +* Backported in [#23493](https://github.com/ClickHouse/ClickHouse/issues/23493): Fix bug that does not allow cast from empty array literal, to array with dimensions greater than 1. Closes [#14476](https://github.com/ClickHouse/ClickHouse/issues/14476). [#23456](https://github.com/ClickHouse/ClickHouse/pull/23456) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23536](https://github.com/ClickHouse/ClickHouse/issues/23536): When modify column's default value without datatype, and this column is used as ReplacingMergeTree's parameter like column `b` in the below example, then the server will core dump: ``` CREATE TABLE alter_test (a Int32, b DateTime) ENGINE = ReplacingMergeTree(b) ORDER BY a; ALTER TABLE alter_test MODIFY COLUMN `b` DEFAULT now(); ``` the sever throw error: ``` 2021.04.22 09:48:00.685317 [ 2607 ] {} BaseDaemon: Received signal 11 2021.04.22 09:48:00.686110 [ 2705 ] {} BaseDaemon: ######################################## 2021.04.22 09:48:00.686336 [ 2705 ] {} BaseDaemon: (version 21.6.1.1, build id: 6459E84DFCF8E778546C5AD2FFE91B3AD71E1B1B) (from thread 2619) (no query) Received signal Segmentation fault (11) 2021.04.22 09:48:00.686572 [ 2705 ] {} BaseDaemon: Address: NULL pointer. Access: read. Address not mapped to object. 2021.04.22 09:48:00.686686 [ 2705 ] {} BaseDaemon: Stack trace: 0x1c2585d7 0x1c254f66 0x1bb7e403 0x1bb58923 0x1bb56a85 0x1c6840ef 0x1c691148 0x2061a05c 0x2061a8e4 0x20775a03 0x207722bd 0x20771048 0x7f6e5c25be25 0x7f6e5bd81bad 2021.04.22 09:48:02.283045 [ 2705 ] {} BaseDaemon: 4. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1449: DB::(anonymous namespace)::checkVersionColumnTypesConversion(DB::IDataType const*, DB::IDataType const*, std::__1::basic_string, std::__1::allocator >) @ 0x1c2585d7 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:03.714451 [ 2705 ] {} BaseDaemon: 5. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1582: DB::MergeTreeData::checkAlterIsPossible(DB::AlterCommands const&, std::__1::shared_ptr) const @ 0x1c254f66 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:04.692949 [ 2705 ] {} BaseDaemon: 6. /mnt/disk4/hewenting/ClickHouse/src/src/Interpreters/InterpreterAlterQuery.cpp:144: DB::InterpreterAlterQuery::execute() @ 0x1bb7e403 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server ```. [#23483](https://github.com/ClickHouse/ClickHouse/pull/23483) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#23533](https://github.com/ClickHouse/ClickHouse/issues/23533): Fix `columns` function when multiple joins in select query. Closes [#22736](https://github.com/ClickHouse/ClickHouse/issues/22736). [#23501](https://github.com/ClickHouse/ClickHouse/pull/23501) ([Maksim Kita](https://github.com/kitaisreal)). + diff --git a/docs/changelogs/v21.3.9.83-lts.md b/docs/changelogs/v21.3.9.83-lts.md new file mode 100644 index 00000000000..e437ee4800f --- /dev/null +++ b/docs/changelogs/v21.3.9.83-lts.md @@ -0,0 +1,15 @@ +### ClickHouse release v21.3.9.83-lts FIXME as compared to v21.3.8.76-lts + +#### Improvement +* Backported in [#23680](https://github.com/ClickHouse/ClickHouse/issues/23680): Fixed `quantile(s)TDigest`. Added special handling of singleton centroids according to tdunning/t-digest 3.2+. Also a bug with over-compression of centroids in implementation of earlier version of the algorithm was fixed. [#23314](https://github.com/ClickHouse/ClickHouse/pull/23314) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Bug Fix +* Backported in [#23579](https://github.com/ClickHouse/ClickHouse/issues/23579): Fixed very rare race condition on background cleanup of old blocks. It might cause a block not to be deduplicated if it's too close to the end of deduplication window. [#23301](https://github.com/ClickHouse/ClickHouse/pull/23301) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23677](https://github.com/ClickHouse/ClickHouse/issues/23677): Don't relax NOT conditions during partition pruning. This fixes [#23305](https://github.com/ClickHouse/ClickHouse/issues/23305) and [#21539](https://github.com/ClickHouse/ClickHouse/issues/21539). [#23310](https://github.com/ClickHouse/ClickHouse/pull/23310) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#23582](https://github.com/ClickHouse/ClickHouse/issues/23582): * Fix bug in dict join with join_algorithm = 'auto'. Close [#23002](https://github.com/ClickHouse/ClickHouse/issues/23002). [#23312](https://github.com/ClickHouse/ClickHouse/pull/23312) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#23585](https://github.com/ClickHouse/ClickHouse/issues/23585): Allow to move more conditions to `PREWHERE` as it was before version 21.1. Insufficient number of moved condtions could lead to worse performance. [#23397](https://github.com/ClickHouse/ClickHouse/pull/23397) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#23592](https://github.com/ClickHouse/ClickHouse/issues/23592): Fixed `Cannot unlink file` error on unsuccessful creation of ReplicatedMergeTree table with multidisk configuration. This closes [#21755](https://github.com/ClickHouse/ClickHouse/issues/21755). [#23433](https://github.com/ClickHouse/ClickHouse/pull/23433) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23590](https://github.com/ClickHouse/ClickHouse/issues/23590): Fix corner cases in vertical merges with `ReplacingMergeTree`. In rare cases they could lead to fails of merges with exceptions like `Incomplete granules are not allowed while blocks are granules size`. [#23459](https://github.com/ClickHouse/ClickHouse/pull/23459) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#23612](https://github.com/ClickHouse/ClickHouse/issues/23612): Fix restart / stop command hanging. Closes [#20214](https://github.com/ClickHouse/ClickHouse/issues/20214). [#23552](https://github.com/ClickHouse/ClickHouse/pull/23552) ([filimonov](https://github.com/filimonov)). +* Backported in [#23696](https://github.com/ClickHouse/ClickHouse/issues/23696): Added an exception in case of completely the same values in both samples in aggregate function `mannWhitneyUTest`. This fixes [#23646](https://github.com/ClickHouse/ClickHouse/issues/23646). [#23654](https://github.com/ClickHouse/ClickHouse/pull/23654) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + diff --git a/docs/changelogs/v21.4.1.6422-prestable.md b/docs/changelogs/v21.4.1.6422-prestable.md new file mode 100644 index 00000000000..8cd10834fae --- /dev/null +++ b/docs/changelogs/v21.4.1.6422-prestable.md @@ -0,0 +1,173 @@ +### ClickHouse release v21.4.1.6422-prestable FIXME as compared to v21.3.1.6185-prestable + +#### Backward Incompatible Change +* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). +* Column `keys` in table `system.dictionaries` was replaced to columns `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `cutToFirstSignificantSubdomainCustom()`/`firstSignificantSubdomainCustom()` returning wrong result for 3+ level domains present in custom top-level domain list. For input domains matching these custom top-level domains, the third-level domain was considered to be the first significant one. This is now fixed. This change may introduce incompatibility if the function is used in e.g. the sharding key. [#21946](https://github.com/ClickHouse/ClickHouse/pull/21946) ([Azat Khuzhin](https://github.com/azat)). +* The `toStartOfIntervalFunction` will align hour intervals to the midnight (in previous versions they were aligned to the start of unix epoch). For example, `toStartOfInterval(x, INTERVAL 11 HOUR)` will split every day into three intervals: 00:00:00..10:59:59, 11:00:00..21:59:59 and 22:00:00..23:59:59. This behaviour is more suited for practical needs. This closes [#9510](https://github.com/ClickHouse/ClickHouse/issues/9510). [#22060](https://github.com/ClickHouse/ClickHouse/pull/22060) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Extended range of `DateTime64` to properly support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). ... [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([Vasily Nemkov](https://github.com/Enmk)). +* - Added support of Kerberos authentication for preconfigured users and HTTP requests (GSS-SPNEGO). [#14995](https://github.com/ClickHouse/ClickHouse/pull/14995) ([Denis Glazachev](https://github.com/traceon)). +* Zero-copy replication for ReplicatedMergeTree over S3 storage. [#16240](https://github.com/ClickHouse/ClickHouse/pull/16240) ([ianton-ru](https://github.com/ianton-ru)). +* Support `dictHas` function for `RangeHashedDictionary`. Fixes [#6680](https://github.com/ClickHouse/ClickHouse/issues/6680). [#19816](https://github.com/ClickHouse/ClickHouse/pull/19816) ([Maksim Kita](https://github.com/kitaisreal)). +* Supports implicit key type conversion for JOIN. Closes [#18567](https://github.com/ClickHouse/ClickHouse/issues/18567). [#19885](https://github.com/ClickHouse/ClickHouse/pull/19885) ([Vladimir C](https://github.com/vdimir)). +* Allow customizing timeouts for http connections used for replication independently from other http timeouts. [#20088](https://github.com/ClickHouse/ClickHouse/pull/20088) ([nvartolomei](https://github.com/nvartolomei)). +* Added async update in `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for Nullable type in `Cache`, `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for multiple attributes fetch with `dictGet`, `dictGetOrDefault` functions. Fixes [#21517](https://github.com/ClickHouse/ClickHouse/issues/21517). [#20595](https://github.com/ClickHouse/ClickHouse/pull/20595) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `Grant,` `Revoke` and `System` values of `query_kind` column for corresponding queries in `system.query_log` ... [#21102](https://github.com/ClickHouse/ClickHouse/pull/21102) ([Vasily Nemkov](https://github.com/Enmk)). +* Added new SQL command ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'. [#21142](https://github.com/ClickHouse/ClickHouse/pull/21142) ([Pavel Kovalenko](https://github.com/Jokser)). +* Added ExecutablePool dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)). +* - Add function `isIPAddressInRange` to test if an IPv4 or IPv6 address is contained in a given CIDR network prefix. [#21329](https://github.com/ClickHouse/ClickHouse/pull/21329) ([PHO](https://github.com/depressed-pho)). +* Add `_partition_id` virtual column for `MergeTree*` engines. Allow to prune partitions by `_partition_id`. Add `partitionID()` function to calculate partition id string. [#21401](https://github.com/ClickHouse/ClickHouse/pull/21401) ([Amos Bird](https://github.com/amosbird)). +* Add new column `slowdowns_count` to `system.clusters`. When using hedged requests, it shows how many times we switched to another replica because this replica was responding slowly. Also show actual value of `errors_count` in `system.clusters`. [#21480](https://github.com/ClickHouse/ClickHouse/pull/21480) ([Kruglov Pavel](https://github.com/Avogar)). +* Add option `--backslash` for clickhouse-format, which can add a backslash at the end of each line of the formatted query. [#21494](https://github.com/ClickHouse/ClickHouse/pull/21494) ([flynn](https://github.com/ucasfl)). +* Add new optional clause GRANTEES for CREATE/ALTER USER commands:. [#21641](https://github.com/ClickHouse/ClickHouse/pull/21641) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `ctime` option to `zookeeper-dump-tree`. It allows to dump node creation time. [#21842](https://github.com/ClickHouse/ClickHouse/pull/21842) ([Ilya](https://github.com/HumanUser)). +* Functions 'dictGet', 'dictHas' use current database name if it is not specified for dictionaries created with DDL. Closes [#21632](https://github.com/ClickHouse/ClickHouse/issues/21632). [#21859](https://github.com/ClickHouse/ClickHouse/pull/21859) ([Maksim Kita](https://github.com/kitaisreal)). +* Support `Nullable` type for `PolygonDictionary` attribute. [#21890](https://github.com/ClickHouse/ClickHouse/pull/21890) ([Maksim Kita](https://github.com/kitaisreal)). +* Added table function `dictionary`. It works the same way as `Dictionary` engine. Closes [#21560](https://github.com/ClickHouse/ClickHouse/issues/21560). [#21910](https://github.com/ClickHouse/ClickHouse/pull/21910) ([Maksim Kita](https://github.com/kitaisreal)). +* Add function `timezoneOf` that returns the timezone name of `DateTime` or `DateTime64` data types. This does not close [#9959](https://github.com/ClickHouse/ClickHouse/issues/9959). Fix inconsistencies in function names: add aliases `timezone` and `timeZone` as well as `toTimezone` and `toTimeZone` and `timezoneOf` and `timeZoneOf`. [#22001](https://github.com/ClickHouse/ClickHouse/pull/22001) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)). +* Improved performance of `dictGetHierarchy`, `dictIsIn` functions. Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)). +* Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* Support parallel parsing for `CSVWithNames` and `TSVWithNames` formats. This closes [#21085](https://github.com/ClickHouse/ClickHouse/issues/21085). [#21149](https://github.com/ClickHouse/ClickHouse/pull/21149) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Improved performance by replacing `memcpy` to another implementation. This closes [#18583](https://github.com/ClickHouse/ClickHouse/issues/18583). [#21520](https://github.com/ClickHouse/ClickHouse/pull/21520) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Supported parallel formatting in clickhouse-local and everywhere else. [#21630](https://github.com/ClickHouse/ClickHouse/pull/21630) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Optimize performance of queries like `SELECT ... FINAL ... WHERE`. Now in queries with `FINAL` it's allowed to move to `PREWHERE` columns, which are in sorting key. ... [#21830](https://github.com/ClickHouse/ClickHouse/pull/21830) ([foolchi](https://github.com/foolchi)). +* Faster `GROUP BY` with small `max_rows_to_group_by` and `group_by_overflow_mode='any'`. [#21856](https://github.com/ClickHouse/ClickHouse/pull/21856) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid unnecessary data copy when using codec `NONE`. Please note that codec `NONE` is mostly useless - it's recommended to always use compression (`LZ4` is by default). Despite the common belief, disabling compression may not improve performance (the opposite effect is possible). The `NONE` codec is useful in some cases: - when data is uncompressable; - for synthetic benchmarks. [#22145](https://github.com/ClickHouse/ClickHouse/pull/22145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add cache for files read with `min_bytes_to_use_mmap_io` setting. It makes significant (2x and more) performance improvement when the value of the setting is small by avoiding frequent mmap/munmap calls and the consequent page faults. Note that mmap IO has major drawbacks that makes it less reliable in production (e.g. hung or SIGBUS on faulty disks; less controllable memory usage). Nevertheless it is good in benchmarks. [#22206](https://github.com/ClickHouse/ClickHouse/pull/22206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable read with mmap IO for file ranges from 64 MiB (the settings `min_bytes_to_use_mmap_io`). It may lead to moderate performance improvement. [#22326](https://github.com/ClickHouse/ClickHouse/pull/22326) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Introduce a new merge tree setting `min_bytes_to_rebalance_partition_over_jbod` which allows assigning new parts to different disks of a JBOD volume in a balanced way. [#16481](https://github.com/ClickHouse/ClickHouse/pull/16481) ([Amos Bird](https://github.com/amosbird)). +* Improve performance of aggregation in order of sorting key (with enabled setting `optimize_aggregation_in_order`). [#19401](https://github.com/ClickHouse/ClickHouse/pull/19401) ([Anton Popov](https://github.com/CurtizJ)). +* MaterializeMySQL: add minmax skipping index for _version column. [#20382](https://github.com/ClickHouse/ClickHouse/pull/20382) ([Stig Bakken](https://github.com/stigsb)). +* Do not create empty parts on INSERT when `optimize_on_insert` setting enabled. Fixes [#20304](https://github.com/ClickHouse/ClickHouse/issues/20304). [#20387](https://github.com/ClickHouse/ClickHouse/pull/20387) ([Kruglov Pavel](https://github.com/Avogar)). +* - Support more cases to rewrite `CROSS JOIN` to `INNER JOIN`. [#20392](https://github.com/ClickHouse/ClickHouse/pull/20392) ([Vladimir C](https://github.com/vdimir)). +* MaterializeMySQL: Attempt to reconnect to MySQL if the connection is lost. [#20961](https://github.com/ClickHouse/ClickHouse/pull/20961) ([Håvard Kvålen](https://github.com/havardk)). +* Improve support of integer keys in data type `Map`. [#21157](https://github.com/ClickHouse/ClickHouse/pull/21157) ([Anton Popov](https://github.com/CurtizJ)). +* Improve clickhouse-format to not throw exception when there are extra spaces or comment after the last query, and throw exception early with readable message when format `ASTInsertQuery` with data . [#21311](https://github.com/ClickHouse/ClickHouse/pull/21311) ([flynn](https://github.com/ucasfl)). +* Age and Precision in graphite rollup configs should increase from retention to retention. Now it's checked and the wrong config raises an exception. [#21496](https://github.com/ClickHouse/ClickHouse/pull/21496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add setting `optimize_skip_unused_shards_limit` to limit the number of sharding key values for `optimize_skip_unused_shards`. [#21512](https://github.com/ClickHouse/ClickHouse/pull/21512) ([Azat Khuzhin](https://github.com/azat)). +* Add aliases `simpleJSONExtract/simpleJSONHas` to `visitParam/visitParamExtract{UInt, Int, Bool, Float, Raw, String}`. Fixes [#21383](https://github.com/ClickHouse/ClickHouse/issues/21383). [#21519](https://github.com/ClickHouse/ClickHouse/pull/21519) ([fastio](https://github.com/fastio)). +* Add `last_error_time`/`last_error_message`/`last_error_stacktrace`/`remote` columns for `system.errors`. [#21529](https://github.com/ClickHouse/ClickHouse/pull/21529) ([Azat Khuzhin](https://github.com/azat)). +* If PODArray was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Propagate query and session settings for distributed DDL queries. Set `distributed_ddl_entry_format_version` to 2 to enable this. - Added `distributed_ddl_output_mode` setting. Supported modes: `none`, `throw` (default), `null_status_on_timeout` and `never_throw`. - Miscellaneous fixes and improvements for `Replicated` database engine. [#21535](https://github.com/ClickHouse/ClickHouse/pull/21535) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update clusters only if their configurations were updated. [#21685](https://github.com/ClickHouse/ClickHouse/pull/21685) ([Kruglov Pavel](https://github.com/Avogar)). +* Support replicas priority for postgres dictionary source. [#21710](https://github.com/ClickHouse/ClickHouse/pull/21710) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Closes [#21701](https://github.com/ClickHouse/ClickHouse/issues/21701). Support non-default table schema for postgres storage/table-function. [#21711](https://github.com/ClickHouse/ClickHouse/pull/21711) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better formatting for `Array` and `Map` data types in Web UI. [#21798](https://github.com/ClickHouse/ClickHouse/pull/21798) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* DiskS3 (experimental feature under development). Fixed bug with the impossibility to move directory if the destination is not empty and cache disk is used. [#21837](https://github.com/ClickHouse/ClickHouse/pull/21837) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add connection pool for PostgreSQL table/database engine and dictionary source. Should fix [#21444](https://github.com/ClickHouse/ClickHouse/issues/21444). [#21839](https://github.com/ClickHouse/ClickHouse/pull/21839) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add profile event HedgedRequestsChangeReplica, change read data timeout from sec to ms. [#21886](https://github.com/ClickHouse/ClickHouse/pull/21886) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `RANGE OFFSET` frame for floating point types. Implement `lagInFrame`/`leadInFrame` window functions, which are analogous to `lag`/`lead`, but respect the window frame. They are identical when the frame is `between unbounded preceding and unbounded following`. This closes [#5485](https://github.com/ClickHouse/ClickHouse/issues/5485). [#21895](https://github.com/ClickHouse/ClickHouse/pull/21895) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Show path to data directory of `EmbeddedRocksDB` tables in system tables. [#21903](https://github.com/ClickHouse/ClickHouse/pull/21903) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Supported `replication_alter_partitions_sync=1` setting for moving partitions from helping table to destination. Decreased default timeouts. Fixes [#21911](https://github.com/ClickHouse/ClickHouse/issues/21911). [#21912](https://github.com/ClickHouse/ClickHouse/pull/21912) ([jasong](https://github.com/songenjie)). +* If partition key of a `MergeTree` table does not include `Date` or `DateTime` columns but includes exactly one `DateTime64` column, expose its values in the `min_time` and `max_time` columns in `system.parts` and `system.parts_columns` tables. Add `min_time` and `max_time` columns to `system.parts_columns` table (these was inconsistency to the `system.parts` table). This closes [#18244](https://github.com/ClickHouse/ClickHouse/issues/18244). [#22011](https://github.com/ClickHouse/ClickHouse/pull/22011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* * Add option strict_increase to windowFunnel function to calculate each event once (resolve [#21835](https://github.com/ClickHouse/ClickHouse/issues/21835)). [#22025](https://github.com/ClickHouse/ClickHouse/pull/22025) ([Vladimir C](https://github.com/vdimir)). +* Added case insensitive aliases for `CONNECTION_ID()` and `VERSION()` functions. This fixes [#22028](https://github.com/ClickHouse/ClickHouse/issues/22028). [#22042](https://github.com/ClickHouse/ClickHouse/pull/22042) ([Eugene Klimov](https://github.com/Slach)). +* Update used version of simdjson to 0.9.1. This fixes [#21984](https://github.com/ClickHouse/ClickHouse/issues/21984). [#22057](https://github.com/ClickHouse/ClickHouse/pull/22057) ([Vitaly Baranov](https://github.com/vitlibar)). +* Convert `system.errors.stack_trace` from `String` into `Array(UInt64)` (This should decrease overhead for the errors collecting). [#22058](https://github.com/ClickHouse/ClickHouse/pull/22058) ([Azat Khuzhin](https://github.com/azat)). +* If tuple of NULLs, e.g. `(NULL, NULL)` is on the left hand side of `IN` operator with tuples of non-NULLs on the right hand side, e.g. `SELECT (NULL, NULL) IN ((0, 0), (3, 1))` return 0 instead of throwing an exception about incompatible types. The expression may also appear due to optimization of something like `SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1)`. This closes [#22017](https://github.com/ClickHouse/ClickHouse/issues/22017). [#22063](https://github.com/ClickHouse/ClickHouse/pull/22063) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added possibility to migrate existing S3 disk to the schema with backup-restore capabilities. [#22070](https://github.com/ClickHouse/ClickHouse/pull/22070) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add case-insensitive history search/navigation and subword movement features to clickhouse-client. [#22105](https://github.com/ClickHouse/ClickHouse/pull/22105) ([Amos Bird](https://github.com/amosbird)). +* Add `current_database` column to `system.processes` table. It contains the current database of the query. [#22365](https://github.com/ClickHouse/ClickHouse/pull/22365) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix MSan report for function `range` with `UInt256` argument (support for large integers is experimental). This closes [#22157](https://github.com/ClickHouse/ClickHouse/issues/22157). [#22387](https://github.com/ClickHouse/ClickHouse/pull/22387) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error `Directory tmp_fetch_XXX already exists` which could happen after failed fetch part. Delete temporary fetch directory if it already exists. Fixes [#14197](https://github.com/ClickHouse/ClickHouse/issues/14197). [#22411](https://github.com/ClickHouse/ClickHouse/pull/22411) ([nvartolomei](https://github.com/nvartolomei)). +* Better exception message in client in case of exception while server is writing blocks. In previous versions client may get misleading message like `Data compressed with different methods`. [#22427](https://github.com/ClickHouse/ClickHouse/pull/22427) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Fixed open behavior of remote host filter in case when there is `remote_url_allow_hosts` section in configuration but no entries there. :warning: please add a note about potential issue when upgrading - @alexey-milovidov. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix name clashes in `PredicateRewriteVisitor`. It caused incorrect `WHERE` filtration after full join. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir C](https://github.com/vdimir)). +* `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* fix official website documents which introduced cluster secret feature. [#21331](https://github.com/ClickHouse/ClickHouse/pull/21331) ([Chao Ma](https://github.com/godliness)). +* Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix Avro format parsing for Kafka. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)). +* Fixed race on SSL object inside SecureSocket in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix that S3 table holds old credentials after config update. [#21457](https://github.com/ClickHouse/ClickHouse/pull/21457) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* Fix table function `clusterAllReplicas` returns wrong `_shard_num`. close [#21481](https://github.com/ClickHouse/ClickHouse/issues/21481). [#21498](https://github.com/ClickHouse/ClickHouse/pull/21498) ([flynn](https://github.com/ucasfl)). +* The ```::poll()``` return ```rc == 1 ```, it could be a request or it could be a response. [#21544](https://github.com/ClickHouse/ClickHouse/pull/21544) ([小路](https://github.com/nicelulu)). +* In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)). +* Fix possible error ` Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)). +* `std::terminate` was called if there is an error writing data into s3. [#21624](https://github.com/ClickHouse/ClickHouse/pull/21624) ([Vladimir C](https://github.com/vdimir)). +* Remove unknown columns from joined table in where for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir C](https://github.com/vdimir)). +* Fix fsync_part_directory for horizontal merge. [#21642](https://github.com/ClickHouse/ClickHouse/pull/21642) ([Azat Khuzhin](https://github.com/azat)). +* Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)). +* Add type conversion for StorageJoin (previously led to SIGSEGV). [#21646](https://github.com/ClickHouse/ClickHouse/pull/21646) ([Azat Khuzhin](https://github.com/azat)). +* Start accepting connections after DDLWorker and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)). +* Fix SIGSEGV on not existing attributes from ip_trie with access_to_key_from_attributes. [#21692](https://github.com/ClickHouse/ClickHouse/pull/21692) ([Azat Khuzhin](https://github.com/azat)). +* Fix function `arrayElement` with type `Map` for constant integer arguments. [#21699](https://github.com/ClickHouse/ClickHouse/pull/21699) ([Anton Popov](https://github.com/CurtizJ)). +* Fix concurrent `OPTIMIZE` and `DROP` for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug for ReplicatedMerge table engines when `ALTER MODIFY COLUMN` query doesn't change the type of decimal column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)). +* Reverted S3 connection pools. [#21737](https://github.com/ClickHouse/ClickHouse/pull/21737) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix adding of parts with already existing in destination table names in query `MOVE PARTITION TO TABLE` with non-replicated `MergeTree` tables. [#21760](https://github.com/ClickHouse/ClickHouse/pull/21760) ([ygrek](https://github.com/ygrek)). +* Fix scalar subquery index analysis. This fixes [#21717](https://github.com/ClickHouse/ClickHouse/issues/21717) , which was introduced in https://github.com/ClickHouse/ClickHouse/pull/18896 . [#21766](https://github.com/ClickHouse/ClickHouse/pull/21766) ([Amos Bird](https://github.com/amosbird)). +* Fix possible crashes in aggregate functions with combinator Distinct, while using two-level aggregation. This is a follow-up fix of https://github.com/ClickHouse/ClickHouse/pull/18365 . Can only reproduced in production env. No test case available yet. cc @CurtizJ. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)). +* Better error handling and logging in WriteBufferFromS3. [#21836](https://github.com/ClickHouse/ClickHouse/pull/21836) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix incorrect query result (and possible crash) which could happen when `WHERE` or `HAVING` condition is pushed before `GROUP BY`. Fixes [#21773](https://github.com/ClickHouse/ClickHouse/issues/21773). [#21841](https://github.com/ClickHouse/ClickHouse/pull/21841) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong `ORDER BY` results when a query contains window functions, and optimization for reading in primary key order is applied. Fixes [#21828](https://github.com/ClickHouse/ClickHouse/issues/21828). [#21915](https://github.com/ClickHouse/ClickHouse/pull/21915) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix reading the HTTP POST request with "multipart/form-data" content type. [#21936](https://github.com/ClickHouse/ClickHouse/pull/21936) ([Ivan](https://github.com/abyss7)). +* Prevent hedged connections overlaps (`Unknown packet 9 from server` error). [#21941](https://github.com/ClickHouse/ClickHouse/pull/21941) ([Azat Khuzhin](https://github.com/azat)). +* Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)). +* In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The function `decrypt` was lacking a check for the minimal size of data encrypted in AEAD mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)). +* Disable `async_socket_for_remote`/`use_hedged_requests` for buggy linux kernels. [#22109](https://github.com/ClickHouse/ClickHouse/pull/22109) ([Azat Khuzhin](https://github.com/azat)). +* Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)). +* Fix the background thread pool name. [#22122](https://github.com/ClickHouse/ClickHouse/pull/22122) ([fastio](https://github.com/fastio)). +* Fix error `Invalid number of rows in Chunk` in `JOIN` with `TOTALS` and `arrayJoin`. Closes [#19303](https://github.com/ClickHouse/ClickHouse/issues/19303). [#22129](https://github.com/ClickHouse/ClickHouse/pull/22129) ([Vladimir C](https://github.com/vdimir)). +* Fix docker entrypoint in case `http_port` is not in the config. [#22132](https://github.com/ClickHouse/ClickHouse/pull/22132) ([Ewout](https://github.com/devwout)). +* Fix uncaught exception in InterserverIOHTTPHandler. [#22146](https://github.com/ClickHouse/ClickHouse/pull/22146) ([Azat Khuzhin](https://github.com/azat)). +* Use finalize() over next() for nested writers. [#22147](https://github.com/ClickHouse/ClickHouse/pull/22147) ([Azat Khuzhin](https://github.com/azat)). +* Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)). +* Fix exception which may happen when `SELECT` has constant `WHERE` condition and source table has columns which names are digits. [#22270](https://github.com/ClickHouse/ClickHouse/pull/22270) ([LiuNeng](https://github.com/liuneng1994)). +* Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)). +* Fixed bug in S3 zero-copy replication for hybrid storage. [#22378](https://github.com/ClickHouse/ClickHouse/pull/22378) ([ianton-ru](https://github.com/ianton-ru)). +* Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove socket from epoll before cancelling packet receiver in HedgedConnections to prevent possible race. I hope it fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Build/Testing/Packaging Improvement +* Fix macOS shared lib build. [#20184](https://github.com/ClickHouse/ClickHouse/pull/20184) ([nvartolomei](https://github.com/nvartolomei)). +* - Added ALL and NONE privilege tests. - Added ROW POLICY tests. - Cleanup of existing tests. - Tests close faster if something fails. [#21354](https://github.com/ClickHouse/ClickHouse/pull/21354) ([MyroTk](https://github.com/MyroTk)). +* Fixing LDAP authentication performance test by removing assertion. [#21507](https://github.com/ClickHouse/ClickHouse/pull/21507) ([vzakaznikov](https://github.com/vzakaznikov)). +* Updating docker/test/testflows/runner/dockerd-entrypoint.sh to use Yandex dockerhub-proxy. [#21551](https://github.com/ClickHouse/ClickHouse/pull/21551) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add llvm-12 binaries name to search in cmake scripts. Implicit constants conversions to mute clang warnings. Updated submodules to build with CMake 3.19. Mute recursion in macro expansion in readpassphrase library. Deprecated -fuse-ld changed to --ld-path for clang. [#21597](https://github.com/ClickHouse/ClickHouse/pull/21597) ([Ilya Yatsishin](https://github.com/qoega)). +* Updating TestFlows to 1.6.74. [#21673](https://github.com/ClickHouse/ClickHouse/pull/21673) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add [jepsen](https://github.com/jepsen-io/jepsen) tests for NuKeeper. [#21677](https://github.com/ClickHouse/ClickHouse/pull/21677) ([alesapin](https://github.com/alesapin)). +* remove decode method with python3. [#21832](https://github.com/ClickHouse/ClickHouse/pull/21832) ([kevin wan](https://github.com/MaxWk)). +* Allow to use clang-tidy with release builds by enabling assertions if it is used. [#21914](https://github.com/ClickHouse/ClickHouse/pull/21914) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Introduce 2 arguments for clickhouse-server image Dockerfile: deb_location & single_binary_location. [#21977](https://github.com/ClickHouse/ClickHouse/pull/21977) ([filimonov](https://github.com/filimonov)). +* Add `tzdata` to Docker containers because reading ORC formats requires it. This closes [#14156](https://github.com/ClickHouse/ClickHouse/issues/14156). [#22000](https://github.com/ClickHouse/ClickHouse/pull/22000) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable status check for SQLancer CI run. [#22015](https://github.com/ClickHouse/ClickHouse/pull/22015) ([Ilya Yatsishin](https://github.com/qoega)). +* Run stateless tests in parallel in CI. Depends on [#22181](https://github.com/ClickHouse/ClickHouse/issues/22181). [#22300](https://github.com/ClickHouse/ClickHouse/pull/22300) ([alesapin](https://github.com/alesapin)). +* try fix [#22289](https://github.com/ClickHouse/ClickHouse/issues/22289) https://clickhouse-test-reports.s3.yandex.net/22289/c71da4a5c8e655f4bdfaa33b92ab022b97dfdf1a/integration_tests_(asan).html#fail1 MySQL is started only once with MaterializeMySQL integration test. [#22341](https://github.com/ClickHouse/ClickHouse/pull/22341) ([Winter Zhang](https://github.com/zhang2014)). +* - Added a way to check memory info for the RBAC testflows tests. [#22403](https://github.com/ClickHouse/ClickHouse/pull/22403) ([MyroTk](https://github.com/MyroTk)). +* Fixed compiling on ppc64le and use the correct instruction pointer register on ppc64le. [#22430](https://github.com/ClickHouse/ClickHouse/pull/22430) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Fix ClickHouse's config embedding and cctz's timezone embedding on ppc64le. ... [#22445](https://github.com/ClickHouse/ClickHouse/pull/22445) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Enable building with jemalloc on ppc64le ... [#22447](https://github.com/ClickHouse/ClickHouse/pull/22447) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Fix Fedora\RHEL\CentOS not finding libclang_rt.builtins on ppc64le ... [#22458](https://github.com/ClickHouse/ClickHouse/pull/22458) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Fix CMake error about internal CMake variable CMAKE_ASM_COMPILE_OBJECT not set on ppc64le ... [#22469](https://github.com/ClickHouse/ClickHouse/pull/22469) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Fix compiling boost on ppc64le ... [#22474](https://github.com/ClickHouse/ClickHouse/pull/22474) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Enable compiling on ppc64le with Clang ... [#22476](https://github.com/ClickHouse/ClickHouse/pull/22476) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Re-enable the S3 (AWS) library on aarch64 ... [#22484](https://github.com/ClickHouse/ClickHouse/pull/22484) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Enable the bundled openldap on ppc64le ... [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)). + +#### Other +* Update tests for hedged requests. [#21998](https://github.com/ClickHouse/ClickHouse/pull/21998) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't set the same timeouts in ReadBufferFromPocoSocket/WriteBufferFromPocoSocket in nextImpl because it causes a race. [#22343](https://github.com/ClickHouse/ClickHouse/pull/22343) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Improve the translation of `query_log.md` in Chinese documents'. [#21729](https://github.com/ClickHouse/ClickHouse/pull/21729) ([Pysaoke](https://github.com/baixuexue123)). +* NO CL ENTRY: 'Update gui.md: add SeekTable'. [#21768](https://github.com/ClickHouse/ClickHouse/pull/21768) ([Vitaliy Fedorchenko](https://github.com/VitaliyMF)). +* NO CL ENTRY: 'Flatten libcpuid PEERDIRs'. [#22078](https://github.com/ClickHouse/ClickHouse/pull/22078) ([Yuriy Chernyshov](https://github.com/georgthegreat)). +* NO CL ENTRY: 'Revert "quick fix for broken resolution of apt.llvm.org on Yandex infra"'. [#22374](https://github.com/ClickHouse/ClickHouse/pull/22374) ([alesapin](https://github.com/alesapin)). + +#### New Feature (datasketches support in clickhouse #14893) + +* Support ThetaSketch to do set operations. [#22207](https://github.com/ClickHouse/ClickHouse/pull/22207) ([Ping Yu](https://github.com/pingyu)). + diff --git a/docs/changelogs/v21.4.2.10-prestable.md b/docs/changelogs/v21.4.2.10-prestable.md new file mode 100644 index 00000000000..1bc440d126c --- /dev/null +++ b/docs/changelogs/v21.4.2.10-prestable.md @@ -0,0 +1,176 @@ +### ClickHouse release v21.4.2.10-prestable FIXME as compared to v21.3.1.6185-prestable + +#### Backward Incompatible Change +* Column `keys` in table `system.dictionaries` was replaced to columns `key.names` and `key.types`. Columns `key.names`, `key.types`, `attribute.names`, `attribute.types` from `system.dictionaries` table does not require dictionary to be loaded. [#21884](https://github.com/ClickHouse/ClickHouse/pull/21884) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `cutToFirstSignificantSubdomainCustom()`/`firstSignificantSubdomainCustom()` returning wrong result for 3+ level domains present in custom top-level domain list. For input domains matching these custom top-level domains, the third-level domain was considered to be the first significant one. This is now fixed. This change may introduce incompatibility if the function is used in e.g. the sharding key. [#21946](https://github.com/ClickHouse/ClickHouse/pull/21946) ([Azat Khuzhin](https://github.com/azat)). +* The `toStartOfIntervalFunction` will align hour intervals to the midnight (in previous versions they were aligned to the start of unix epoch). For example, `toStartOfInterval(x, INTERVAL 11 HOUR)` will split every day into three intervals: 00:00:00..10:59:59, 11:00:00..21:59:59 and 22:00:00..23:59:59. This behaviour is more suited for practical needs. This closes [#9510](https://github.com/ClickHouse/ClickHouse/issues/9510). [#22060](https://github.com/ClickHouse/ClickHouse/pull/22060) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Extended range of `DateTime64` to properly support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). ... [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([Vasily Nemkov](https://github.com/Enmk)). +* - Added support of Kerberos authentication for preconfigured users and HTTP requests (GSS-SPNEGO). [#14995](https://github.com/ClickHouse/ClickHouse/pull/14995) ([Denis Glazachev](https://github.com/traceon)). +* Zero-copy replication for ReplicatedMergeTree over S3 storage. [#16240](https://github.com/ClickHouse/ClickHouse/pull/16240) ([ianton-ru](https://github.com/ianton-ru)). +* Support `dictHas` function for `RangeHashedDictionary`. Fixes [#6680](https://github.com/ClickHouse/ClickHouse/issues/6680). [#19816](https://github.com/ClickHouse/ClickHouse/pull/19816) ([Maksim Kita](https://github.com/kitaisreal)). +* Supports implicit key type conversion for JOIN. Closes [#18567](https://github.com/ClickHouse/ClickHouse/issues/18567). [#19885](https://github.com/ClickHouse/ClickHouse/pull/19885) ([Vladimir C](https://github.com/vdimir)). +* Allow customizing timeouts for http connections used for replication independently from other http timeouts. [#20088](https://github.com/ClickHouse/ClickHouse/pull/20088) ([nvartolomei](https://github.com/nvartolomei)). +* Added async update in `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for Nullable type in `Cache`, `ComplexKeyCache`, `SSDCache`, `SSDComplexKeyCache` dictionaries. Added support for multiple attributes fetch with `dictGet`, `dictGetOrDefault` functions. Fixes [#21517](https://github.com/ClickHouse/ClickHouse/issues/21517). [#20595](https://github.com/ClickHouse/ClickHouse/pull/20595) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `Grant,` `Revoke` and `System` values of `query_kind` column for corresponding queries in `system.query_log` ... [#21102](https://github.com/ClickHouse/ClickHouse/pull/21102) ([Vasily Nemkov](https://github.com/Enmk)). +* Added new SQL command ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'. [#21142](https://github.com/ClickHouse/ClickHouse/pull/21142) ([Pavel Kovalenko](https://github.com/Jokser)). +* Added ExecutablePool dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)). +* - Add function `isIPAddressInRange` to test if an IPv4 or IPv6 address is contained in a given CIDR network prefix. [#21329](https://github.com/ClickHouse/ClickHouse/pull/21329) ([PHO](https://github.com/depressed-pho)). +* Add `_partition_id` virtual column for `MergeTree*` engines. Allow to prune partitions by `_partition_id`. Add `partitionID()` function to calculate partition id string. [#21401](https://github.com/ClickHouse/ClickHouse/pull/21401) ([Amos Bird](https://github.com/amosbird)). +* Add new column `slowdowns_count` to `system.clusters`. When using hedged requests, it shows how many times we switched to another replica because this replica was responding slowly. Also show actual value of `errors_count` in `system.clusters`. [#21480](https://github.com/ClickHouse/ClickHouse/pull/21480) ([Kruglov Pavel](https://github.com/Avogar)). +* Add option `--backslash` for clickhouse-format, which can add a backslash at the end of each line of the formatted query. [#21494](https://github.com/ClickHouse/ClickHouse/pull/21494) ([flynn](https://github.com/ucasfl)). +* Add new optional clause GRANTEES for CREATE/ALTER USER commands:. [#21641](https://github.com/ClickHouse/ClickHouse/pull/21641) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `ctime` option to `zookeeper-dump-tree`. It allows to dump node creation time. [#21842](https://github.com/ClickHouse/ClickHouse/pull/21842) ([Ilya](https://github.com/HumanUser)). +* Functions 'dictGet', 'dictHas' use current database name if it is not specified for dictionaries created with DDL. Closes [#21632](https://github.com/ClickHouse/ClickHouse/issues/21632). [#21859](https://github.com/ClickHouse/ClickHouse/pull/21859) ([Maksim Kita](https://github.com/kitaisreal)). +* Support `Nullable` type for `PolygonDictionary` attribute. [#21890](https://github.com/ClickHouse/ClickHouse/pull/21890) ([Maksim Kita](https://github.com/kitaisreal)). +* Added table function `dictionary`. It works the same way as `Dictionary` engine. Closes [#21560](https://github.com/ClickHouse/ClickHouse/issues/21560). [#21910](https://github.com/ClickHouse/ClickHouse/pull/21910) ([Maksim Kita](https://github.com/kitaisreal)). +* Add function `timezoneOf` that returns the timezone name of `DateTime` or `DateTime64` data types. This does not close [#9959](https://github.com/ClickHouse/ClickHouse/issues/9959). Fix inconsistencies in function names: add aliases `timezone` and `timeZone` as well as `toTimezone` and `toTimeZone` and `timezoneOf` and `timeZoneOf`. [#22001](https://github.com/ClickHouse/ClickHouse/pull/22001) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)). + +#### Performance Improvement +* Support parallel parsing for `CSVWithNames` and `TSVWithNames` formats. This closes [#21085](https://github.com/ClickHouse/ClickHouse/issues/21085). [#21149](https://github.com/ClickHouse/ClickHouse/pull/21149) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Improved performance by replacing `memcpy` to another implementation. This closes [#18583](https://github.com/ClickHouse/ClickHouse/issues/18583). [#21520](https://github.com/ClickHouse/ClickHouse/pull/21520) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Supported parallel formatting in clickhouse-local and everywhere else. [#21630](https://github.com/ClickHouse/ClickHouse/pull/21630) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Optimize performance of queries like `SELECT ... FINAL ... WHERE`. Now in queries with `FINAL` it's allowed to move to `PREWHERE` columns, which are in sorting key. ... [#21830](https://github.com/ClickHouse/ClickHouse/pull/21830) ([foolchi](https://github.com/foolchi)). +* Faster `GROUP BY` with small `max_rows_to_group_by` and `group_by_overflow_mode='any'`. [#21856](https://github.com/ClickHouse/ClickHouse/pull/21856) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid unnecessary data copy when using codec `NONE`. Please note that codec `NONE` is mostly useless - it's recommended to always use compression (`LZ4` is by default). Despite the common belief, disabling compression may not improve performance (the opposite effect is possible). The `NONE` codec is useful in some cases: - when data is uncompressable; - for synthetic benchmarks. [#22145](https://github.com/ClickHouse/ClickHouse/pull/22145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add cache for files read with `min_bytes_to_use_mmap_io` setting. It makes significant (2x and more) performance improvement when the value of the setting is small by avoiding frequent mmap/munmap calls and the consequent page faults. Note that mmap IO has major drawbacks that makes it less reliable in production (e.g. hung or SIGBUS on faulty disks; less controllable memory usage). Nevertheless it is good in benchmarks. [#22206](https://github.com/ClickHouse/ClickHouse/pull/22206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable read with mmap IO for file ranges from 64 MiB (the settings `min_bytes_to_use_mmap_io`). It may lead to moderate performance improvement. [#22326](https://github.com/ClickHouse/ClickHouse/pull/22326) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Introduce a new merge tree setting `min_bytes_to_rebalance_partition_over_jbod` which allows assigning new parts to different disks of a JBOD volume in a balanced way. [#16481](https://github.com/ClickHouse/ClickHouse/pull/16481) ([Amos Bird](https://github.com/amosbird)). +* Improve performance of aggregation in order of sorting key (with enabled setting `optimize_aggregation_in_order`). [#19401](https://github.com/ClickHouse/ClickHouse/pull/19401) ([Anton Popov](https://github.com/CurtizJ)). +* MaterializeMySQL: add minmax skipping index for _version column. [#20382](https://github.com/ClickHouse/ClickHouse/pull/20382) ([Stig Bakken](https://github.com/stigsb)). +* Do not create empty parts on INSERT when `optimize_on_insert` setting enabled. Fixes [#20304](https://github.com/ClickHouse/ClickHouse/issues/20304). [#20387](https://github.com/ClickHouse/ClickHouse/pull/20387) ([Kruglov Pavel](https://github.com/Avogar)). +* - Support more cases to rewrite `CROSS JOIN` to `INNER JOIN`. [#20392](https://github.com/ClickHouse/ClickHouse/pull/20392) ([Vladimir C](https://github.com/vdimir)). +* MaterializeMySQL: Attempt to reconnect to MySQL if the connection is lost. [#20961](https://github.com/ClickHouse/ClickHouse/pull/20961) ([Håvard Kvålen](https://github.com/havardk)). +* Improve support of integer keys in data type `Map`. [#21157](https://github.com/ClickHouse/ClickHouse/pull/21157) ([Anton Popov](https://github.com/CurtizJ)). +* Improve clickhouse-format to not throw exception when there are extra spaces or comment after the last query, and throw exception early with readable message when format `ASTInsertQuery` with data . [#21311](https://github.com/ClickHouse/ClickHouse/pull/21311) ([flynn](https://github.com/ucasfl)). +* Age and Precision in graphite rollup configs should increase from retention to retention. Now it's checked and the wrong config raises an exception. [#21496](https://github.com/ClickHouse/ClickHouse/pull/21496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add setting `optimize_skip_unused_shards_limit` to limit the number of sharding key values for `optimize_skip_unused_shards`. [#21512](https://github.com/ClickHouse/ClickHouse/pull/21512) ([Azat Khuzhin](https://github.com/azat)). +* Add `last_error_time`/`last_error_message`/`last_error_stacktrace`/`remote` columns for `system.errors`. [#21529](https://github.com/ClickHouse/ClickHouse/pull/21529) ([Azat Khuzhin](https://github.com/azat)). +* If PODArray was instantiated with element size that is neither a fraction or a multiple of 16, buffer overflow was possible. No bugs in current releases exist. [#21533](https://github.com/ClickHouse/ClickHouse/pull/21533) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Propagate query and session settings for distributed DDL queries. Set `distributed_ddl_entry_format_version` to 2 to enable this. - Added `distributed_ddl_output_mode` setting. Supported modes: `none`, `throw` (default), `null_status_on_timeout` and `never_throw`. - Miscellaneous fixes and improvements for `Replicated` database engine. [#21535](https://github.com/ClickHouse/ClickHouse/pull/21535) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update clusters only if their configurations were updated. [#21685](https://github.com/ClickHouse/ClickHouse/pull/21685) ([Kruglov Pavel](https://github.com/Avogar)). +* Support replicas priority for postgres dictionary source. [#21710](https://github.com/ClickHouse/ClickHouse/pull/21710) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Closes [#21701](https://github.com/ClickHouse/ClickHouse/issues/21701). Support non-default table schema for postgres storage/table-function. [#21711](https://github.com/ClickHouse/ClickHouse/pull/21711) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better formatting for `Array` and `Map` data types in Web UI. [#21798](https://github.com/ClickHouse/ClickHouse/pull/21798) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* DiskS3 (experimental feature under development). Fixed bug with the impossibility to move directory if the destination is not empty and cache disk is used. [#21837](https://github.com/ClickHouse/ClickHouse/pull/21837) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add connection pool for PostgreSQL table/database engine and dictionary source. Should fix [#21444](https://github.com/ClickHouse/ClickHouse/issues/21444). [#21839](https://github.com/ClickHouse/ClickHouse/pull/21839) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add profile event HedgedRequestsChangeReplica, change read data timeout from sec to ms. [#21886](https://github.com/ClickHouse/ClickHouse/pull/21886) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `RANGE OFFSET` frame for floating point types. Implement `lagInFrame`/`leadInFrame` window functions, which are analogous to `lag`/`lead`, but respect the window frame. They are identical when the frame is `between unbounded preceding and unbounded following`. This closes [#5485](https://github.com/ClickHouse/ClickHouse/issues/5485). [#21895](https://github.com/ClickHouse/ClickHouse/pull/21895) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Show path to data directory of `EmbeddedRocksDB` tables in system tables. [#21903](https://github.com/ClickHouse/ClickHouse/pull/21903) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Supported `replication_alter_partitions_sync=1` setting for moving partitions from helping table to destination. Decreased default timeouts. Fixes [#21911](https://github.com/ClickHouse/ClickHouse/issues/21911). [#21912](https://github.com/ClickHouse/ClickHouse/pull/21912) ([jasong](https://github.com/songenjie)). +* If partition key of a `MergeTree` table does not include `Date` or `DateTime` columns but includes exactly one `DateTime64` column, expose its values in the `min_time` and `max_time` columns in `system.parts` and `system.parts_columns` tables. Add `min_time` and `max_time` columns to `system.parts_columns` table (these was inconsistency to the `system.parts` table). This closes [#18244](https://github.com/ClickHouse/ClickHouse/issues/18244). [#22011](https://github.com/ClickHouse/ClickHouse/pull/22011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* * Add option strict_increase to windowFunnel function to calculate each event once (resolve [#21835](https://github.com/ClickHouse/ClickHouse/issues/21835)). [#22025](https://github.com/ClickHouse/ClickHouse/pull/22025) ([Vladimir C](https://github.com/vdimir)). +* Added case insensitive aliases for `CONNECTION_ID()` and `VERSION()` functions. This fixes [#22028](https://github.com/ClickHouse/ClickHouse/issues/22028). [#22042](https://github.com/ClickHouse/ClickHouse/pull/22042) ([Eugene Klimov](https://github.com/Slach)). +* Update used version of simdjson to 0.9.1. This fixes [#21984](https://github.com/ClickHouse/ClickHouse/issues/21984). [#22057](https://github.com/ClickHouse/ClickHouse/pull/22057) ([Vitaly Baranov](https://github.com/vitlibar)). +* Convert `system.errors.stack_trace` from `String` into `Array(UInt64)` (This should decrease overhead for the errors collecting). [#22058](https://github.com/ClickHouse/ClickHouse/pull/22058) ([Azat Khuzhin](https://github.com/azat)). +* If tuple of NULLs, e.g. `(NULL, NULL)` is on the left hand side of `IN` operator with tuples of non-NULLs on the right hand side, e.g. `SELECT (NULL, NULL) IN ((0, 0), (3, 1))` return 0 instead of throwing an exception about incompatible types. The expression may also appear due to optimization of something like `SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1)`. This closes [#22017](https://github.com/ClickHouse/ClickHouse/issues/22017). [#22063](https://github.com/ClickHouse/ClickHouse/pull/22063) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added possibility to migrate existing S3 disk to the schema with backup-restore capabilities. [#22070](https://github.com/ClickHouse/ClickHouse/pull/22070) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add case-insensitive history search/navigation and subword movement features to clickhouse-client. [#22105](https://github.com/ClickHouse/ClickHouse/pull/22105) ([Amos Bird](https://github.com/amosbird)). +* Add `current_database` column to `system.processes` table. It contains the current database of the query. [#22365](https://github.com/ClickHouse/ClickHouse/pull/22365) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix MSan report for function `range` with `UInt256` argument (support for large integers is experimental). This closes [#22157](https://github.com/ClickHouse/ClickHouse/issues/22157). [#22387](https://github.com/ClickHouse/ClickHouse/pull/22387) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error `Directory tmp_fetch_XXX already exists` which could happen after failed fetch part. Delete temporary fetch directory if it already exists. Fixes [#14197](https://github.com/ClickHouse/ClickHouse/issues/14197). [#22411](https://github.com/ClickHouse/ClickHouse/pull/22411) ([nvartolomei](https://github.com/nvartolomei)). +* Better exception message in client in case of exception while server is writing blocks. In previous versions client may get misleading message like `Data compressed with different methods`. [#22427](https://github.com/ClickHouse/ClickHouse/pull/22427) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Fixed open behavior of remote host filter in case when there is `remote_url_allow_hosts` section in configuration but no entries there. :warning: please add a note about potential issue when upgrading - @alexey-milovidov. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)). +* `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* fix official website documents which introduced cluster secret feature. [#21331](https://github.com/ClickHouse/ClickHouse/pull/21331) ([Chao Ma](https://github.com/godliness)). +* Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix Avro format parsing for Kafka. Fixes [#21437](https://github.com/ClickHouse/ClickHouse/issues/21437). [#21438](https://github.com/ClickHouse/ClickHouse/pull/21438) ([Ilya Golshtein](https://github.com/ilejn)). +* Fixed race on SSL object inside SecureSocket in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix that S3 table holds old credentials after config update. [#21457](https://github.com/ClickHouse/ClickHouse/pull/21457) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* Fix table function `clusterAllReplicas` returns wrong `_shard_num`. close [#21481](https://github.com/ClickHouse/ClickHouse/issues/21481). [#21498](https://github.com/ClickHouse/ClickHouse/pull/21498) ([flynn](https://github.com/ucasfl)). +* The ```::poll()``` return ```rc == 1 ```, it could be a request or it could be a response. [#21544](https://github.com/ClickHouse/ClickHouse/pull/21544) ([小路](https://github.com/nicelulu)). +* In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)). +* Fix possible error ` Cannot find column` when `optimize_skip_unused_shards` is enabled and zero shards are used. [#21579](https://github.com/ClickHouse/ClickHouse/pull/21579) ([Azat Khuzhin](https://github.com/azat)). +* `std::terminate` was called if there is an error writing data into s3. [#21624](https://github.com/ClickHouse/ClickHouse/pull/21624) ([Vladimir C](https://github.com/vdimir)). +* Remove unknown columns from joined table in where for queries to external database engines (MySQL, PostgreSQL). close [#14614](https://github.com/ClickHouse/ClickHouse/issues/14614), close [#19288](https://github.com/ClickHouse/ClickHouse/issues/19288) (dup), close [#19645](https://github.com/ClickHouse/ClickHouse/issues/19645) (dup). [#21640](https://github.com/ClickHouse/ClickHouse/pull/21640) ([Vladimir C](https://github.com/vdimir)). +* Fix fsync_part_directory for horizontal merge. [#21642](https://github.com/ClickHouse/ClickHouse/pull/21642) ([Azat Khuzhin](https://github.com/azat)). +* Fix distributed requests cancellation (for example simple select from multiple shards with limit, i.e. `select * from remote('127.{2,3}', system.numbers) limit 100`) with `async_socket_for_remote=1`. [#21643](https://github.com/ClickHouse/ClickHouse/pull/21643) ([Azat Khuzhin](https://github.com/azat)). +* Add type conversion for StorageJoin (previously led to SIGSEGV). [#21646](https://github.com/ClickHouse/ClickHouse/pull/21646) ([Azat Khuzhin](https://github.com/azat)). +* Start accepting connections after DDLWorker and dictionaries initialization. [#21676](https://github.com/ClickHouse/ClickHouse/pull/21676) ([Azat Khuzhin](https://github.com/azat)). +* Fix SIGSEGV on not existing attributes from ip_trie with access_to_key_from_attributes. [#21692](https://github.com/ClickHouse/ClickHouse/pull/21692) ([Azat Khuzhin](https://github.com/azat)). +* Fix function `arrayElement` with type `Map` for constant integer arguments. [#21699](https://github.com/ClickHouse/ClickHouse/pull/21699) ([Anton Popov](https://github.com/CurtizJ)). +* Fix concurrent `OPTIMIZE` and `DROP` for `ReplicatedMergeTree`. [#21716](https://github.com/ClickHouse/ClickHouse/pull/21716) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug for ReplicatedMerge table engines when `ALTER MODIFY COLUMN` query doesn't change the type of decimal column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)). +* Reverted S3 connection pools. [#21737](https://github.com/ClickHouse/ClickHouse/pull/21737) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix adding of parts with already existing in destination table names in query `MOVE PARTITION TO TABLE` with non-replicated `MergeTree` tables. [#21760](https://github.com/ClickHouse/ClickHouse/pull/21760) ([ygrek](https://github.com/ygrek)). +* Fix scalar subquery index analysis. This fixes [#21717](https://github.com/ClickHouse/ClickHouse/issues/21717) , which was introduced in https://github.com/ClickHouse/ClickHouse/pull/18896 . [#21766](https://github.com/ClickHouse/ClickHouse/pull/21766) ([Amos Bird](https://github.com/amosbird)). +* Fix possible crashes in aggregate functions with combinator Distinct, while using two-level aggregation. This is a follow-up fix of https://github.com/ClickHouse/ClickHouse/pull/18365 . Can only reproduced in production env. No test case available yet. cc @CurtizJ. [#21818](https://github.com/ClickHouse/ClickHouse/pull/21818) ([Amos Bird](https://github.com/amosbird)). +* Better error handling and logging in WriteBufferFromS3. [#21836](https://github.com/ClickHouse/ClickHouse/pull/21836) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix incorrect query result (and possible crash) which could happen when `WHERE` or `HAVING` condition is pushed before `GROUP BY`. Fixes [#21773](https://github.com/ClickHouse/ClickHouse/issues/21773). [#21841](https://github.com/ClickHouse/ClickHouse/pull/21841) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#22625](https://github.com/ClickHouse/ClickHouse/issues/22625): Fix bug, which leads to underaggregation of data in case of enabled `optimize_aggregation_in_order` and many parts in table. Slightly improve performance of aggregation with enabled `optimize_aggregation_in_order`. [#21889](https://github.com/ClickHouse/ClickHouse/pull/21889) ([Anton Popov](https://github.com/CurtizJ)). +* Fix wrong `ORDER BY` results when a query contains window functions, and optimization for reading in primary key order is applied. Fixes [#21828](https://github.com/ClickHouse/ClickHouse/issues/21828). [#21915](https://github.com/ClickHouse/ClickHouse/pull/21915) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix reading the HTTP POST request with "multipart/form-data" content type. [#21936](https://github.com/ClickHouse/ClickHouse/pull/21936) ([Ivan](https://github.com/abyss7)). +* Prevent hedged connections overlaps (`Unknown packet 9 from server` error). [#21941](https://github.com/ClickHouse/ClickHouse/pull/21941) ([Azat Khuzhin](https://github.com/azat)). +* Reverted [#15454](https://github.com/ClickHouse/ClickHouse/issues/15454) that may cause significant increase in memory usage while loading external dictionaries of hashed type. This closes [#21935](https://github.com/ClickHouse/ClickHouse/issues/21935). [#21948](https://github.com/ClickHouse/ClickHouse/pull/21948) ([Maksim Kita](https://github.com/kitaisreal)). +* In rare case, merge for `CollapsingMergeTree` may create granule with `index_granularity + 1` rows. Because of this, internal check, added in [#18928](https://github.com/ClickHouse/ClickHouse/issues/18928) (affects 21.2 and 21.3), may fail with error `Incomplete granules are not allowed while blocks are granules size`. This error did not allow parts to merge. [#21976](https://github.com/ClickHouse/ClickHouse/pull/21976) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The function `decrypt` was lacking a check for the minimal size of data encrypted in AEAD mode. This closes [#21897](https://github.com/ClickHouse/ClickHouse/issues/21897). [#22064](https://github.com/ClickHouse/ClickHouse/pull/22064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)). +* Disable `async_socket_for_remote`/`use_hedged_requests` for buggy linux kernels. [#22109](https://github.com/ClickHouse/ClickHouse/pull/22109) ([Azat Khuzhin](https://github.com/azat)). +* Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)). +* Fix the background thread pool name. [#22122](https://github.com/ClickHouse/ClickHouse/pull/22122) ([fastio](https://github.com/fastio)). +* Fix error `Invalid number of rows in Chunk` in `JOIN` with `TOTALS` and `arrayJoin`. Closes [#19303](https://github.com/ClickHouse/ClickHouse/issues/19303). [#22129](https://github.com/ClickHouse/ClickHouse/pull/22129) ([Vladimir C](https://github.com/vdimir)). +* Fix docker entrypoint in case `http_port` is not in the config. [#22132](https://github.com/ClickHouse/ClickHouse/pull/22132) ([Ewout](https://github.com/devwout)). +* Fix uncaught exception in InterserverIOHTTPHandler. [#22146](https://github.com/ClickHouse/ClickHouse/pull/22146) ([Azat Khuzhin](https://github.com/azat)). +* Use finalize() over next() for nested writers. [#22147](https://github.com/ClickHouse/ClickHouse/pull/22147) ([Azat Khuzhin](https://github.com/azat)). +* Fix query cancellation with `use_hedged_requests=0` and `async_socket_for_remote=1`. [#22183](https://github.com/ClickHouse/ClickHouse/pull/22183) ([Azat Khuzhin](https://github.com/azat)). +* Fix exception which may happen when `SELECT` has constant `WHERE` condition and source table has columns which names are digits. [#22270](https://github.com/ClickHouse/ClickHouse/pull/22270) ([LiuNeng](https://github.com/liuneng1994)). +* Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)). +* Backported in [#22541](https://github.com/ClickHouse/ClickHouse/issues/22541): Do not limit HTTP chunk size. Fixes [#21907](https://github.com/ClickHouse/ClickHouse/issues/21907). [#22322](https://github.com/ClickHouse/ClickHouse/pull/22322) ([Ivan](https://github.com/abyss7)). +* Fixed bug in S3 zero-copy replication for hybrid storage. [#22378](https://github.com/ClickHouse/ClickHouse/pull/22378) ([ianton-ru](https://github.com/ianton-ru)). +* Backported in [#22532](https://github.com/ClickHouse/ClickHouse/issues/22532): Buffer overflow (on read) was possible in `tokenbf_v1` full text index. The excessive bytes are not used but the read operation may lead to crash in rare cases. This closes [#19233](https://github.com/ClickHouse/ClickHouse/issues/19233). [#22421](https://github.com/ClickHouse/ClickHouse/pull/22421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add (missing) memory accounting in parallel parsing routines. In previous versions OOM was possible when the resultset contains very large blocks of data. This closes [#22008](https://github.com/ClickHouse/ClickHouse/issues/22008). [#22425](https://github.com/ClickHouse/ClickHouse/pull/22425) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22519](https://github.com/ClickHouse/ClickHouse/issues/22519): Remove socket from epoll before cancelling packet receiver in HedgedConnections to prevent possible race. I hope it fixes [#22161](https://github.com/ClickHouse/ClickHouse/issues/22161). [#22443](https://github.com/ClickHouse/ClickHouse/pull/22443) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#22617](https://github.com/ClickHouse/ClickHouse/issues/22617): Fix ClickHouseDictionarySource configuration loop. Closes [#14314](https://github.com/ClickHouse/ClickHouse/issues/14314). [#22479](https://github.com/ClickHouse/ClickHouse/pull/22479) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#22558](https://github.com/ClickHouse/ClickHouse/issues/22558): Fix bug in partial merge join with `LowCardinality`. Close [#22386](https://github.com/ClickHouse/ClickHouse/issues/22386), close [#22388](https://github.com/ClickHouse/ClickHouse/issues/22388). [#22510](https://github.com/ClickHouse/ClickHouse/pull/22510) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#22559](https://github.com/ClickHouse/ClickHouse/issues/22559): Fix deserialization of empty string without newline at end of TSV format. This closes [#20244](https://github.com/ClickHouse/ClickHouse/issues/20244). Possible workaround without version update: set `input_format_null_as_default` to zero. It was zero in old versions. [#22527](https://github.com/ClickHouse/ClickHouse/pull/22527) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#22577](https://github.com/ClickHouse/ClickHouse/issues/22577): Fix UB by unlocking the rwlock of the TinyLog from the same thread. [#22560](https://github.com/ClickHouse/ClickHouse/pull/22560) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22649](https://github.com/ClickHouse/ClickHouse/issues/22649): Avoid UB in *Log engines for rwlock unlock due to unlock from another thread. [#22583](https://github.com/ClickHouse/ClickHouse/pull/22583) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22652](https://github.com/ClickHouse/ClickHouse/issues/22652): Try flush write buffer only if it is initialized. Fixes segfault when client closes connection very early [#22579](https://github.com/ClickHouse/ClickHouse/issues/22579). [#22591](https://github.com/ClickHouse/ClickHouse/pull/22591) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#22680](https://github.com/ClickHouse/ClickHouse/issues/22680): Fix LOGICAL_ERROR for Log with nested types w/o columns in the SELECT clause. [#22654](https://github.com/ClickHouse/ClickHouse/pull/22654) ([Azat Khuzhin](https://github.com/azat)). + +#### Build/Testing/Packaging Improvement +* Fix macOS shared lib build. [#20184](https://github.com/ClickHouse/ClickHouse/pull/20184) ([nvartolomei](https://github.com/nvartolomei)). +* - Added ALL and NONE privilege tests. - Added ROW POLICY tests. - Cleanup of existing tests. - Tests close faster if something fails. [#21354](https://github.com/ClickHouse/ClickHouse/pull/21354) ([MyroTk](https://github.com/MyroTk)). +* Fixing LDAP authentication performance test by removing assertion. [#21507](https://github.com/ClickHouse/ClickHouse/pull/21507) ([vzakaznikov](https://github.com/vzakaznikov)). +* Updating docker/test/testflows/runner/dockerd-entrypoint.sh to use Yandex dockerhub-proxy. [#21551](https://github.com/ClickHouse/ClickHouse/pull/21551) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add llvm-12 binaries name to search in cmake scripts. Implicit constants conversions to mute clang warnings. Updated submodules to build with CMake 3.19. Mute recursion in macro expansion in readpassphrase library. Deprecated -fuse-ld changed to --ld-path for clang. [#21597](https://github.com/ClickHouse/ClickHouse/pull/21597) ([Ilya Yatsishin](https://github.com/qoega)). +* Updating TestFlows to 1.6.74. [#21673](https://github.com/ClickHouse/ClickHouse/pull/21673) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add [jepsen](https://github.com/jepsen-io/jepsen) tests for NuKeeper. [#21677](https://github.com/ClickHouse/ClickHouse/pull/21677) ([alesapin](https://github.com/alesapin)). +* remove decode method with python3. [#21832](https://github.com/ClickHouse/ClickHouse/pull/21832) ([kevin wan](https://github.com/MaxWk)). +* Allow to use clang-tidy with release builds by enabling assertions if it is used. [#21914](https://github.com/ClickHouse/ClickHouse/pull/21914) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Introduce 2 arguments for clickhouse-server image Dockerfile: deb_location & single_binary_location. [#21977](https://github.com/ClickHouse/ClickHouse/pull/21977) ([filimonov](https://github.com/filimonov)). +* Add `tzdata` to Docker containers because reading ORC formats requires it. This closes [#14156](https://github.com/ClickHouse/ClickHouse/issues/14156). [#22000](https://github.com/ClickHouse/ClickHouse/pull/22000) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable status check for SQLancer CI run. [#22015](https://github.com/ClickHouse/ClickHouse/pull/22015) ([Ilya Yatsishin](https://github.com/qoega)). +* try fix [#22289](https://github.com/ClickHouse/ClickHouse/issues/22289) https://clickhouse-test-reports.s3.yandex.net/22289/c71da4a5c8e655f4bdfaa33b92ab022b97dfdf1a/integration_tests_(asan).html#fail1 MySQL is started only once with MaterializeMySQL integration test. [#22341](https://github.com/ClickHouse/ClickHouse/pull/22341) ([Winter Zhang](https://github.com/zhang2014)). +* - Added a way to check memory info for the RBAC testflows tests. [#22403](https://github.com/ClickHouse/ClickHouse/pull/22403) ([MyroTk](https://github.com/MyroTk)). +* Fixed compiling on ppc64le and use the correct instruction pointer register on ppc64le. [#22430](https://github.com/ClickHouse/ClickHouse/pull/22430) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Fix ClickHouse's config embedding and cctz's timezone embedding on ppc64le. ... [#22445](https://github.com/ClickHouse/ClickHouse/pull/22445) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Enable building with jemalloc on ppc64le ... [#22447](https://github.com/ClickHouse/ClickHouse/pull/22447) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Fix Fedora\RHEL\CentOS not finding libclang_rt.builtins on ppc64le ... [#22458](https://github.com/ClickHouse/ClickHouse/pull/22458) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Fix CMake error about internal CMake variable CMAKE_ASM_COMPILE_OBJECT not set on ppc64le ... [#22469](https://github.com/ClickHouse/ClickHouse/pull/22469) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Fix compiling boost on ppc64le ... [#22474](https://github.com/ClickHouse/ClickHouse/pull/22474) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Enable compiling on ppc64le with Clang ... [#22476](https://github.com/ClickHouse/ClickHouse/pull/22476) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Re-enable the S3 (AWS) library on aarch64 ... [#22484](https://github.com/ClickHouse/ClickHouse/pull/22484) ([Kfir Itzhak](https://github.com/mastertheknife)). + +#### Other +* Update tests for hedged requests. [#21998](https://github.com/ClickHouse/ClickHouse/pull/21998) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't set the same timeouts in ReadBufferFromPocoSocket/WriteBufferFromPocoSocket in nextImpl because it causes a race. [#22343](https://github.com/ClickHouse/ClickHouse/pull/22343) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Improve the translation of `query_log.md` in Chinese documents'. [#21729](https://github.com/ClickHouse/ClickHouse/pull/21729) ([Pysaoke](https://github.com/baixuexue123)). +* NO CL ENTRY: 'Update gui.md: add SeekTable'. [#21768](https://github.com/ClickHouse/ClickHouse/pull/21768) ([Vitaliy Fedorchenko](https://github.com/VitaliyMF)). +* NO CL ENTRY: 'Flatten libcpuid PEERDIRs'. [#22078](https://github.com/ClickHouse/ClickHouse/pull/22078) ([Yuriy Chernyshov](https://github.com/georgthegreat)). +* NO CL ENTRY: 'Revert "quick fix for broken resolution of apt.llvm.org on Yandex infra"'. [#22374](https://github.com/ClickHouse/ClickHouse/pull/22374) ([alesapin](https://github.com/alesapin)). + +#### New Feature (datasketches support in clickhouse #14893) + +* Support ThetaSketch to do set operations. [#22207](https://github.com/ClickHouse/ClickHouse/pull/22207) ([Ping Yu](https://github.com/pingyu)). + diff --git a/docs/changelogs/v21.4.3.21-stable.md b/docs/changelogs/v21.4.3.21-stable.md new file mode 100644 index 00000000000..dc3d7b7005b --- /dev/null +++ b/docs/changelogs/v21.4.3.21-stable.md @@ -0,0 +1,18 @@ +### ClickHouse release v21.4.3.21-stable FIXME as compared to v21.4.2.10-prestable + +#### Improvement +* Backported in [#22934](https://github.com/ClickHouse/ClickHouse/issues/22934): Correctly check structure of async distributed blocks. [#22325](https://github.com/ClickHouse/ClickHouse/pull/22325) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Backported in [#22966](https://github.com/ClickHouse/ClickHouse/issues/22966): Fix very rare bug when quorum insert with `quorum_parallel=1` is not really "quorum" because of deduplication. [#18215](https://github.com/ClickHouse/ClickHouse/pull/18215) ([filimonov](https://github.com/filimonov)). +* Backported in [#22722](https://github.com/ClickHouse/ClickHouse/issues/22722): Check if table function view is used as a column. This complements https://github.com/ClickHouse/ClickHouse/pull/20350. [#21465](https://github.com/ClickHouse/ClickHouse/pull/21465) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22808](https://github.com/ClickHouse/ClickHouse/issues/22808): Follow-up fix for [#21936](https://github.com/ClickHouse/ClickHouse/issues/21936). Also fixes [#22433](https://github.com/ClickHouse/ClickHouse/issues/22433). [#22518](https://github.com/ClickHouse/ClickHouse/pull/22518) ([Ivan](https://github.com/abyss7)). +* Backported in [#22757](https://github.com/ClickHouse/ClickHouse/issues/22757): Fix usage of function `map` in distributed queries. [#22588](https://github.com/ClickHouse/ClickHouse/pull/22588) ([foolchi](https://github.com/foolchi)). +* Backported in [#22702](https://github.com/ClickHouse/ClickHouse/issues/22702): Fix wait for mutations on several replicas for ReplicatedMergeTree table engines. Previously, mutation/alter query may finish before mutation actually executed on other replicas. [#22669](https://github.com/ClickHouse/ClickHouse/pull/22669) ([alesapin](https://github.com/alesapin)). +* Backported in [#22741](https://github.com/ClickHouse/ClickHouse/issues/22741): Fix possible hangs in zk requests in case of OOM exception. Fixes [#22438](https://github.com/ClickHouse/ClickHouse/issues/22438). [#22684](https://github.com/ClickHouse/ClickHouse/pull/22684) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#22888](https://github.com/ClickHouse/ClickHouse/issues/22888): Fix approx total rows accounting for reverse reading from MergeTree. [#22726](https://github.com/ClickHouse/ClickHouse/pull/22726) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22884](https://github.com/ClickHouse/ClickHouse/issues/22884): Fix pushdown of `HAVING` in case, when filter column is used in aggregation. [#22763](https://github.com/ClickHouse/ClickHouse/pull/22763) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#22917](https://github.com/ClickHouse/ClickHouse/issues/22917): LIVE VIEW (experimental feature). Fix possible hanging in concurrent DROP/CREATE of TEMPORARY LIVE VIEW in `TemporaryLiveViewCleaner`, see https://gist.github.com/vzakaznikov/0c03195960fc86b56bfe2bc73a90019e. [#22858](https://github.com/ClickHouse/ClickHouse/pull/22858) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#22921](https://github.com/ClickHouse/ClickHouse/issues/22921): Fixed a crash when using `mannWhitneyUTest` and `rankCorr` with window functions. This fixes [#22728](https://github.com/ClickHouse/ClickHouse/issues/22728). [#22876](https://github.com/ClickHouse/ClickHouse/pull/22876) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#22957](https://github.com/ClickHouse/ClickHouse/issues/22957): Fix usage of constant columns of type `Map` with nullable values. [#22939](https://github.com/ClickHouse/ClickHouse/pull/22939) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v21.4.4.30-stable.md b/docs/changelogs/v21.4.4.30-stable.md new file mode 100644 index 00000000000..f029d334fc7 --- /dev/null +++ b/docs/changelogs/v21.4.4.30-stable.md @@ -0,0 +1,25 @@ +### ClickHouse release v21.4.4.30-stable FIXME as compared to v21.4.3.21-stable + +#### Backward Incompatible Change +* Backported in [#23145](https://github.com/ClickHouse/ClickHouse/issues/23145): Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). + +#### New Feature +* Backported in [#23147](https://github.com/ClickHouse/ClickHouse/issues/23147): Improved performance of `dictGetHierarchy`, `dictIsIn` functions. Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23146](https://github.com/ClickHouse/ClickHouse/issues/23146): Added function `dictGetOrNull`. It works like `dictGet`, but return `Null` in case key was not found in dictionary. Closes [#22375](https://github.com/ClickHouse/ClickHouse/issues/22375). [#22413](https://github.com/ClickHouse/ClickHouse/pull/22413) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Improvement +* Backported in [#23148](https://github.com/ClickHouse/ClickHouse/issues/23148): Add aliases `simpleJSONExtract/simpleJSONHas` to `visitParam/visitParamExtract{UInt, Int, Bool, Float, Raw, String}`. Fixes [#21383](https://github.com/ClickHouse/ClickHouse/issues/21383). [#21519](https://github.com/ClickHouse/ClickHouse/pull/21519) ([fastio](https://github.com/fastio)). +* Backported in [#23016](https://github.com/ClickHouse/ClickHouse/issues/23016): Set `background_fetches_pool_size` to 8 that is better for production usage with frequent small insertions or slow ZooKeeper cluster. [#22945](https://github.com/ClickHouse/ClickHouse/pull/22945) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23079](https://github.com/ClickHouse/ClickHouse/issues/23079): Raised the threshold on max number of matches in result of the function `extractAllGroupsHorizontal`. [#23036](https://github.com/ClickHouse/ClickHouse/pull/23036) ([Vasily Nemkov](https://github.com/Enmk)). + +#### Bug Fix +* Backported in [#23158](https://github.com/ClickHouse/ClickHouse/issues/23158): Fixed a bug with unlimited wait for auxiliary AWS requests. [#22594](https://github.com/ClickHouse/ClickHouse/pull/22594) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#23018](https://github.com/ClickHouse/ClickHouse/issues/23018): fixed `formatDateTime()` on `DateTime64` and "%C" format specifier fixed `toDateTime64()` for large values and non-zero scale. ... [#22937](https://github.com/ClickHouse/ClickHouse/pull/22937) ([Vasily Nemkov](https://github.com/Enmk)). +* Backported in [#23031](https://github.com/ClickHouse/ClickHouse/issues/23031): Fix error `Cannot find column in ActionsDAG result` which may happen if subquery uses `untuple`. Fixes [#22290](https://github.com/ClickHouse/ClickHouse/issues/22290). [#22991](https://github.com/ClickHouse/ClickHouse/pull/22991) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23074](https://github.com/ClickHouse/ClickHouse/issues/23074): Remove non-essential details from suggestions in clickhouse-client. This closes [#22158](https://github.com/ClickHouse/ClickHouse/issues/22158). [#23040](https://github.com/ClickHouse/ClickHouse/pull/23040) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23097](https://github.com/ClickHouse/ClickHouse/issues/23097): Fixed `Table .inner_id... doesn't exist` error when selecting from Materialized View after detaching it from Atomic database and attaching back. [#23047](https://github.com/ClickHouse/ClickHouse/pull/23047) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23173](https://github.com/ClickHouse/ClickHouse/issues/23173): Some values were formatted with alignment in center in table cells in `Markdown` format. Not anymore. [#23096](https://github.com/ClickHouse/ClickHouse/pull/23096) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#23149](https://github.com/ClickHouse/ClickHouse/issues/23149): Enable the bundled openldap on ppc64le ... [#22487](https://github.com/ClickHouse/ClickHouse/pull/22487) ([Kfir Itzhak](https://github.com/mastertheknife)). + diff --git a/docs/changelogs/v21.4.5.46-stable.md b/docs/changelogs/v21.4.5.46-stable.md new file mode 100644 index 00000000000..664037ba596 --- /dev/null +++ b/docs/changelogs/v21.4.5.46-stable.md @@ -0,0 +1,21 @@ +### ClickHouse release v21.4.5.46-stable FIXME as compared to v21.4.4.30-stable + +#### Improvement +* Backported in [#23273](https://github.com/ClickHouse/ClickHouse/issues/23273): Disable settings `use_hedged_requests` and `async_socket_for_remote` because there is an evidence that it may cause issues. [#23261](https://github.com/ClickHouse/ClickHouse/pull/23261) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#23234](https://github.com/ClickHouse/ClickHouse/issues/23234): Server might fail to start if `data_type_default_nullable` setting is enabled in default profile, it's fixed. Fixes [#22573](https://github.com/ClickHouse/ClickHouse/issues/22573). [#23185](https://github.com/ClickHouse/ClickHouse/pull/23185) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23407](https://github.com/ClickHouse/ClickHouse/issues/23407): QueryAliasVisitor to prefer alias for ASTWithAlias if subquery was optimized to constant. Fixes [#22924](https://github.com/ClickHouse/ClickHouse/issues/22924). Fixes [#10401](https://github.com/ClickHouse/ClickHouse/issues/10401). [#23191](https://github.com/ClickHouse/ClickHouse/pull/23191) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23382](https://github.com/ClickHouse/ClickHouse/issues/23382): Fixed `Not found column` error when selecting from `MaterializeMySQL` with condition on key column. Fixes [#22432](https://github.com/ClickHouse/ClickHouse/issues/22432). [#23200](https://github.com/ClickHouse/ClickHouse/pull/23200) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23385](https://github.com/ClickHouse/ClickHouse/issues/23385): Fixed the behavior when disabling `input_format_with_names_use_header ` setting discards all the input with CSVWithNames format. This fixes [#22406](https://github.com/ClickHouse/ClickHouse/issues/22406). [#23202](https://github.com/ClickHouse/ClickHouse/pull/23202) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#23287](https://github.com/ClickHouse/ClickHouse/issues/23287): Fixed simple key dictionary from DDL creation if primary key is not first attribute. Fixes [#23236](https://github.com/ClickHouse/ClickHouse/issues/23236). [#23262](https://github.com/ClickHouse/ClickHouse/pull/23262) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23451](https://github.com/ClickHouse/ClickHouse/issues/23451): Fixed very rare (distributed) race condition between creation and removal of ReplicatedMergeTree tables. It might cause exceptions like `node doesn't exist` on attempt to create replicated table. Fixes [#21419](https://github.com/ClickHouse/ClickHouse/issues/21419). [#23294](https://github.com/ClickHouse/ClickHouse/pull/23294) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23426](https://github.com/ClickHouse/ClickHouse/issues/23426): Fix possible crash in case if `unknown packet` was received form remote query (with `async_socket_for_remote` enabled). Maybe fixes [#21167](https://github.com/ClickHouse/ClickHouse/issues/21167). [#23309](https://github.com/ClickHouse/ClickHouse/pull/23309) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23467](https://github.com/ClickHouse/ClickHouse/issues/23467): `ORDER BY` with `COLLATE` was not working correctly if the column is in primary key (or is a monotonic function of it) and the setting `optimize_read_in_order` is not turned off. This closes [#22379](https://github.com/ClickHouse/ClickHouse/issues/22379). Workaround for older versions: turn the setting `optimize_read_in_order` off. [#23375](https://github.com/ClickHouse/ClickHouse/pull/23375) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23509](https://github.com/ClickHouse/ClickHouse/issues/23509): Remove support for `argMin` and `argMax` for single `Tuple` argument. The code was not memory-safe. The feature was added by mistake and it is confusing for people. These functions can be reintroduced under different names later. This fixes [#22384](https://github.com/ClickHouse/ClickHouse/issues/22384) and reverts [#17359](https://github.com/ClickHouse/ClickHouse/issues/17359). [#23393](https://github.com/ClickHouse/ClickHouse/pull/23393) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23478](https://github.com/ClickHouse/ClickHouse/issues/23478): Kafka storage may support `arrow` and `arrowstream` format messages. [#23415](https://github.com/ClickHouse/ClickHouse/pull/23415) ([Chao Ma](https://github.com/godliness)). +* Backported in [#23498](https://github.com/ClickHouse/ClickHouse/issues/23498): - Bug fix for `deltaSum` aggregate function in counter reset case ... [#23437](https://github.com/ClickHouse/ClickHouse/pull/23437) ([Russ Frank](https://github.com/rf)). +* Backported in [#23492](https://github.com/ClickHouse/ClickHouse/issues/23492): Fix bug that does not allow cast from empty array literal, to array with dimensions greater than 1. Closes [#14476](https://github.com/ClickHouse/ClickHouse/issues/14476). [#23456](https://github.com/ClickHouse/ClickHouse/pull/23456) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23534](https://github.com/ClickHouse/ClickHouse/issues/23534): When modify column's default value without datatype, and this column is used as ReplacingMergeTree's parameter like column `b` in the below example, then the server will core dump: ``` CREATE TABLE alter_test (a Int32, b DateTime) ENGINE = ReplacingMergeTree(b) ORDER BY a; ALTER TABLE alter_test MODIFY COLUMN `b` DEFAULT now(); ``` the sever throw error: ``` 2021.04.22 09:48:00.685317 [ 2607 ] {} BaseDaemon: Received signal 11 2021.04.22 09:48:00.686110 [ 2705 ] {} BaseDaemon: ######################################## 2021.04.22 09:48:00.686336 [ 2705 ] {} BaseDaemon: (version 21.6.1.1, build id: 6459E84DFCF8E778546C5AD2FFE91B3AD71E1B1B) (from thread 2619) (no query) Received signal Segmentation fault (11) 2021.04.22 09:48:00.686572 [ 2705 ] {} BaseDaemon: Address: NULL pointer. Access: read. Address not mapped to object. 2021.04.22 09:48:00.686686 [ 2705 ] {} BaseDaemon: Stack trace: 0x1c2585d7 0x1c254f66 0x1bb7e403 0x1bb58923 0x1bb56a85 0x1c6840ef 0x1c691148 0x2061a05c 0x2061a8e4 0x20775a03 0x207722bd 0x20771048 0x7f6e5c25be25 0x7f6e5bd81bad 2021.04.22 09:48:02.283045 [ 2705 ] {} BaseDaemon: 4. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1449: DB::(anonymous namespace)::checkVersionColumnTypesConversion(DB::IDataType const*, DB::IDataType const*, std::__1::basic_string, std::__1::allocator >) @ 0x1c2585d7 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:03.714451 [ 2705 ] {} BaseDaemon: 5. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1582: DB::MergeTreeData::checkAlterIsPossible(DB::AlterCommands const&, std::__1::shared_ptr) const @ 0x1c254f66 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:04.692949 [ 2705 ] {} BaseDaemon: 6. /mnt/disk4/hewenting/ClickHouse/src/src/Interpreters/InterpreterAlterQuery.cpp:144: DB::InterpreterAlterQuery::execute() @ 0x1bb7e403 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server ```. [#23483](https://github.com/ClickHouse/ClickHouse/pull/23483) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#23531](https://github.com/ClickHouse/ClickHouse/issues/23531): Fix `columns` function when multiple joins in select query. Closes [#22736](https://github.com/ClickHouse/ClickHouse/issues/22736). [#23501](https://github.com/ClickHouse/ClickHouse/pull/23501) ([Maksim Kita](https://github.com/kitaisreal)). + diff --git a/docs/changelogs/v21.4.6.55-stable.md b/docs/changelogs/v21.4.6.55-stable.md new file mode 100644 index 00000000000..ea3e413ea0c --- /dev/null +++ b/docs/changelogs/v21.4.6.55-stable.md @@ -0,0 +1,18 @@ +### ClickHouse release v21.4.6.55-stable FIXME as compared to v21.4.5.46-stable + +#### Improvement +* Backported in [#23679](https://github.com/ClickHouse/ClickHouse/issues/23679): Fixed `quantile(s)TDigest`. Added special handling of singleton centroids according to tdunning/t-digest 3.2+. Also a bug with over-compression of centroids in implementation of earlier version of the algorithm was fixed. [#23314](https://github.com/ClickHouse/ClickHouse/pull/23314) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Bug Fix +* Backported in [#23580](https://github.com/ClickHouse/ClickHouse/issues/23580): Fixed very rare race condition on background cleanup of old blocks. It might cause a block not to be deduplicated if it's too close to the end of deduplication window. [#23301](https://github.com/ClickHouse/ClickHouse/pull/23301) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23676](https://github.com/ClickHouse/ClickHouse/issues/23676): Don't relax NOT conditions during partition pruning. This fixes [#23305](https://github.com/ClickHouse/ClickHouse/issues/23305) and [#21539](https://github.com/ClickHouse/ClickHouse/issues/21539). [#23310](https://github.com/ClickHouse/ClickHouse/pull/23310) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#23583](https://github.com/ClickHouse/ClickHouse/issues/23583): * Fix bug in dict join with join_algorithm = 'auto'. Close [#23002](https://github.com/ClickHouse/ClickHouse/issues/23002). [#23312](https://github.com/ClickHouse/ClickHouse/pull/23312) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#23641](https://github.com/ClickHouse/ClickHouse/issues/23641): Fix incompatible constant expression generation during partition pruning based on virtual columns. This fixes https://github.com/ClickHouse/ClickHouse/pull/21401#discussion_r611888913. [#23366](https://github.com/ClickHouse/ClickHouse/pull/23366) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#23586](https://github.com/ClickHouse/ClickHouse/issues/23586): Allow to move more conditions to `PREWHERE` as it was before version 21.1. Insufficient number of moved condtions could lead to worse performance. [#23397](https://github.com/ClickHouse/ClickHouse/pull/23397) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#23588](https://github.com/ClickHouse/ClickHouse/issues/23588): Fixed `Cannot unlink file` error on unsuccessful creation of ReplicatedMergeTree table with multidisk configuration. This closes [#21755](https://github.com/ClickHouse/ClickHouse/issues/21755). [#23433](https://github.com/ClickHouse/ClickHouse/pull/23433) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23591](https://github.com/ClickHouse/ClickHouse/issues/23591): Fix corner cases in vertical merges with `ReplacingMergeTree`. In rare cases they could lead to fails of merges with exceptions like `Incomplete granules are not allowed while blocks are granules size`. [#23459](https://github.com/ClickHouse/ClickHouse/pull/23459) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#23611](https://github.com/ClickHouse/ClickHouse/issues/23611): Fix restart / stop command hanging. Closes [#20214](https://github.com/ClickHouse/ClickHouse/issues/20214). [#23552](https://github.com/ClickHouse/ClickHouse/pull/23552) ([filimonov](https://github.com/filimonov)). +* Backported in [#23693](https://github.com/ClickHouse/ClickHouse/issues/23693): Fixed server fault when inserting data through HTTP caused an exception. This fixes [#23512](https://github.com/ClickHouse/ClickHouse/issues/23512). [#23643](https://github.com/ClickHouse/ClickHouse/pull/23643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#23695](https://github.com/ClickHouse/ClickHouse/issues/23695): Added an exception in case of completely the same values in both samples in aggregate function `mannWhitneyUTest`. This fixes [#23646](https://github.com/ClickHouse/ClickHouse/issues/23646). [#23654](https://github.com/ClickHouse/ClickHouse/pull/23654) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#23774](https://github.com/ClickHouse/ClickHouse/issues/23774): Avoid possible "Cannot schedule a task" error (in case some exception had been occurred) on INSERT into Distributed. [#23744](https://github.com/ClickHouse/ClickHouse/pull/23744) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.4.7.3-stable.md b/docs/changelogs/v21.4.7.3-stable.md new file mode 100644 index 00000000000..0dad6cfcb2b --- /dev/null +++ b/docs/changelogs/v21.4.7.3-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v21.4.7.3-stable FIXME as compared to v21.4.6.55-stable + +#### Bug Fix +* Backported in [#23971](https://github.com/ClickHouse/ClickHouse/issues/23971): Fixed a bug in recovery of staled `ReplicatedMergeTree` replica. Some metadata updates could be ignored by staled replica if `ALTER` query was executed during downtime of the replica. [#23742](https://github.com/ClickHouse/ClickHouse/pull/23742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23819](https://github.com/ClickHouse/ClickHouse/issues/23819): Fix crash when `PREWHERE` and row policy filter are both in effect with empty result. [#23763](https://github.com/ClickHouse/ClickHouse/pull/23763) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#23816](https://github.com/ClickHouse/ClickHouse/issues/23816): Fix `CLEAR COLUMN` does not work when it is referenced by materialized view. Close [#23764](https://github.com/ClickHouse/ClickHouse/issues/23764). [#23781](https://github.com/ClickHouse/ClickHouse/pull/23781) ([flynn](https://github.com/ucasfl)). +* Backported in [#23831](https://github.com/ClickHouse/ClickHouse/issues/23831): Fix error `Can't initialize pipeline with empty pipe` for queries with `GLOBAL IN/JOIN` and `use_hedged_requests`. Fixes [#23431](https://github.com/ClickHouse/ClickHouse/issues/23431). [#23805](https://github.com/ClickHouse/ClickHouse/pull/23805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23930](https://github.com/ClickHouse/ClickHouse/issues/23930): HashedDictionary complex key update field initial load fix. Closes [#23800](https://github.com/ClickHouse/ClickHouse/issues/23800). [#23824](https://github.com/ClickHouse/ClickHouse/pull/23824) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23957](https://github.com/ClickHouse/ClickHouse/issues/23957): Fix keys metrics accounting for CACHE() dictionary with duplicates in the source (leads to `DictCacheKeysRequestedMiss` overflows). [#23929](https://github.com/ClickHouse/ClickHouse/pull/23929) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#24003](https://github.com/ClickHouse/ClickHouse/issues/24003): Fix SIGSEGV for external GROUP BY and overflow row (i.e. queries like `SELECT FROM GROUP BY WITH TOTALS SETTINGS max_bytes_before_external_group_by>0, max_rows_to_group_by>0, group_by_overflow_mode='any', totals_mode='before_having'`). [#23962](https://github.com/ClickHouse/ClickHouse/pull/23962) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#24188](https://github.com/ClickHouse/ClickHouse/issues/24188): Some `ALTER PARTITION` queries might cause `Part A intersects previous part B` and `Unexpected merged part C intersecting drop range D` errors in replication queue. It's fixed. Fixes [#23296](https://github.com/ClickHouse/ClickHouse/issues/23296). [#23997](https://github.com/ClickHouse/ClickHouse/pull/23997) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24031](https://github.com/ClickHouse/ClickHouse/issues/24031): Fix crash in MergeJoin, close [#24010](https://github.com/ClickHouse/ClickHouse/issues/24010). [#24013](https://github.com/ClickHouse/ClickHouse/pull/24013) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#24141](https://github.com/ClickHouse/ClickHouse/issues/24141): Bug: explain pipeline with` select xxx final `shows wrong pipeline: ``` dell123 :) explain pipeline select z from prewhere_move_select_final final;. [#24116](https://github.com/ClickHouse/ClickHouse/pull/24116) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#24171](https://github.com/ClickHouse/ClickHouse/issues/24171): Fix a rare bug that could lead to a partially initialized table that can serve write requests (insert/alter/so on). Now such tables will be in readonly mode. [#24122](https://github.com/ClickHouse/ClickHouse/pull/24122) ([alesapin](https://github.com/alesapin)). +* Backported in [#24215](https://github.com/ClickHouse/ClickHouse/issues/24215): Fix race condition which could happen in RBAC under a heavy load. This PR fixes [#24090](https://github.com/ClickHouse/ClickHouse/issues/24090), [#24134](https://github.com/ClickHouse/ClickHouse/issues/24134),. [#24176](https://github.com/ClickHouse/ClickHouse/pull/24176) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#24243](https://github.com/ClickHouse/ClickHouse/issues/24243): Fix abnormal server termination due to hdfs becoming not accessible during query execution. Closes [#24117](https://github.com/ClickHouse/ClickHouse/issues/24117). [#24191](https://github.com/ClickHouse/ClickHouse/pull/24191) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v21.5.1.6601-prestable.md b/docs/changelogs/v21.5.1.6601-prestable.md new file mode 100644 index 00000000000..2d09ce6c20b --- /dev/null +++ b/docs/changelogs/v21.5.1.6601-prestable.md @@ -0,0 +1,109 @@ +### ClickHouse release v21.5.1.6601-prestable FIXME as compared to v21.4.1.6422-prestable + +#### Backward Incompatible Change +* Change comparison of integers and floating point numbers when integer is not exactly representable in the floating point data type. In new version comparison will return false as the rounding error will occur. Example: `9223372036854775808.0 != 9223372036854775808`, because the number `9223372036854775808` is not representable as floating point number exactly (and `9223372036854775808.0` is rounded to `9223372036854776000.0`). But in previous version the comparison will return as the numbers are equal, because if the floating point number `9223372036854776000.0` get converted back to UInt64, it will yield `9223372036854775808`. For the reference, the Python programming language also treats these numbers as equal. But this behaviour was dependend on CPU model (different results on AMD64 and AArch64 for some out-of-range numbers), so we make the comparison more precise. It will treat int and float numbers equal only if int is represented in floating point type exactly. [#22595](https://github.com/ClickHouse/ClickHouse/pull/22595) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implement function `arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum)` that applies the expression to each element of the array (or set of parallel arrays) and collect result in accumulator. [#21589](https://github.com/ClickHouse/ClickHouse/pull/21589) ([Dmitry Krylov](https://github.com/dmalkr)). +* - Support Apple m1. [#21639](https://github.com/ClickHouse/ClickHouse/pull/21639) ([changvvb](https://github.com/changvvb)). +* Add a setting `max_distributed_depth` that limits the depth of recursive queries to `Distributed` tables. Closes [#20229](https://github.com/ClickHouse/ClickHouse/issues/20229). [#21942](https://github.com/ClickHouse/ClickHouse/pull/21942) ([flynn](https://github.com/ucasfl)). +* Table function, which allows to process files from `s3` in parallel from many nodes in a specified cluster. [#22012](https://github.com/ClickHouse/ClickHouse/pull/22012) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support for replicas in MySQL/PostgreSQL table engine / table function. Added wrapper storage over MySQL / PostgreSQL storages to allow shards. Closes [#20969](https://github.com/ClickHouse/ClickHouse/issues/20969). [#22217](https://github.com/ClickHouse/ClickHouse/pull/22217) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update paths to the catboost model configs in config reloading. [#22434](https://github.com/ClickHouse/ClickHouse/pull/22434) ([Kruglov Pavel](https://github.com/Avogar)). +* Add new setting `non_replicated_deduplication_window` for non-replicated MergeTree inserts deduplication. [#22514](https://github.com/ClickHouse/ClickHouse/pull/22514) ([alesapin](https://github.com/alesapin)). +* FlatDictionary added `initial_array_size`, `max_array_size` options. [#22521](https://github.com/ClickHouse/ClickHouse/pull/22521) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `ALTER TABLE ... FETCH PART ...` query. It's similar to `FETCH PARTITION`, but fetches only one part. [#22706](https://github.com/ClickHouse/ClickHouse/pull/22706) ([jasong](https://github.com/songenjie)). +* Added `Decimal256` type support in dictionaries. Closes [#20979](https://github.com/ClickHouse/ClickHouse/issues/20979). [#22960](https://github.com/ClickHouse/ClickHouse/pull/22960) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* Add function alignment for possibly better performance. [#21431](https://github.com/ClickHouse/ClickHouse/pull/21431) ([Daniel Kutenin](https://github.com/danlark1)). +* Exclude values that does not belong to the shard from right part of IN section for distributed queries (under `optimize_skip_unused_shards_rewrite_in`, enabled by default, since it still requires `optimize_skip_unused_shards`). [#21511](https://github.com/ClickHouse/ClickHouse/pull/21511) ([Azat Khuzhin](https://github.com/azat)). +* Disable compression by default when interacting with localhost (with clickhouse-client or server to server with distributed queries) via native protocol. It may improve performance of some import/export operations. This closes [#22234](https://github.com/ClickHouse/ClickHouse/issues/22234). [#22237](https://github.com/ClickHouse/ClickHouse/pull/22237) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of reading from `ArrowStream` input format for sources other then local file (e.g. URL). [#22673](https://github.com/ClickHouse/ClickHouse/pull/22673) ([nvartolomei](https://github.com/nvartolomei)). +* Improve performance of `intDiv` by dynamic dispatch for AVX2. This closes [#22314](https://github.com/ClickHouse/ClickHouse/issues/22314). [#23000](https://github.com/ClickHouse/ClickHouse/pull/23000) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Support dynamic interserver credentials. [#14113](https://github.com/ClickHouse/ClickHouse/pull/14113) ([johnskopis](https://github.com/johnskopis)). +* Add clickhouse-library-bridge for library dictionary source. Closes [#9502](https://github.com/ClickHouse/ClickHouse/issues/9502). [#21509](https://github.com/ClickHouse/ClickHouse/pull/21509) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow publishing Kafka errors to a virtual column of Kafka engine, controlled by the `kafka_handle_error_mode` setting. [#21850](https://github.com/ClickHouse/ClickHouse/pull/21850) ([fastio](https://github.com/fastio)). +* Use nanodbc instead of Poco::ODBC. Closes [#9678](https://github.com/ClickHouse/ClickHouse/issues/9678). Add support for DateTime64 and Decimal* for ODBC table engine. Closes [#21961](https://github.com/ClickHouse/ClickHouse/issues/21961). Fixed issue with cyrillic text being truncated. Closes [#16246](https://github.com/ClickHouse/ClickHouse/issues/16246). Added connection pools for odbc bridge. [#21972](https://github.com/ClickHouse/ClickHouse/pull/21972) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Speeded up reading subset of columns from File-like table engine with internal file written in column oriented data formats (Parquet, Arrow and ORC) This closes [#20129](https://github.com/ClickHouse/ClickHouse/issues/20129) Done by @keen-wolf. [#22299](https://github.com/ClickHouse/ClickHouse/pull/22299) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Correctly check structure of async distributed blocks. [#22325](https://github.com/ClickHouse/ClickHouse/pull/22325) ([Azat Khuzhin](https://github.com/azat)). +* Make `round` function to behave consistently on non-x86_64 platforms. Rounding half to nearest even (Banker's rounding) is used. [#22582](https://github.com/ClickHouse/ClickHouse/pull/22582) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clear the rest of the screen and show cursor in `clickhouse-client` if previous program has left garbage in terminal. This closes [#16518](https://github.com/ClickHouse/ClickHouse/issues/16518). [#22634](https://github.com/ClickHouse/ClickHouse/pull/22634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to use CTE in VIEW definition. This closes [#22491](https://github.com/ClickHouse/ClickHouse/issues/22491). [#22657](https://github.com/ClickHouse/ClickHouse/pull/22657) ([Amos Bird](https://github.com/amosbird)). +* Add metric to track how much time is spend during waiting for Buffer layer lock. [#22725](https://github.com/ClickHouse/ClickHouse/pull/22725) ([Azat Khuzhin](https://github.com/azat)). +* Allow RBAC row policy via postgresql protocol. Closes [#22658](https://github.com/ClickHouse/ClickHouse/issues/22658). PostgreSQL protocol is enabled in configuration by default. [#22755](https://github.com/ClickHouse/ClickHouse/pull/22755) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MaterializeMySQL (experimental feature). Make Clickhouse to be able to replicate MySQL databases containing views without failing. This is accomplished by ignoring the views. ... [#22760](https://github.com/ClickHouse/ClickHouse/pull/22760) ([Christian Frøystad](https://github.com/cfroystad)). +* `dateDiff` now works with `DateTime64` arguments (even for values outside of `DateTime` range) ... [#22931](https://github.com/ClickHouse/ClickHouse/pull/22931) ([Vasily Nemkov](https://github.com/Enmk)). +* Set `background_fetches_pool_size` to 8 that is better for production usage with frequent small insertions or slow ZooKeeper cluster. [#22945](https://github.com/ClickHouse/ClickHouse/pull/22945) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix inactive_parts_to_throw_insert=0 with inactive_parts_to_delay_insert>0. [#22947](https://github.com/ClickHouse/ClickHouse/pull/22947) ([Azat Khuzhin](https://github.com/azat)). +* Respect max_part_removal_threads for ReplicatedMergeTree. [#22971](https://github.com/ClickHouse/ClickHouse/pull/22971) ([Azat Khuzhin](https://github.com/azat)). +* Fix an error handling in Poco HTTP Client for AWS. [#22973](https://github.com/ClickHouse/ClickHouse/pull/22973) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). +* When selecting from MergeTree table with NULL in WHERE condition, in rare cases, exception was thrown. This closes [#20019](https://github.com/ClickHouse/ClickHouse/issues/20019). [#22978](https://github.com/ClickHouse/ClickHouse/pull/22978) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to flush buffer only in background for StorageBuffer. [#22986](https://github.com/ClickHouse/ClickHouse/pull/22986) ([Azat Khuzhin](https://github.com/azat)). +* Add ability to run clickhouse-keeper with SSL. Config settings `keeper_server.tcp_port_secure` can be used for secure interaction between client and keeper-server. `keeper_server.raft_configuration.secure` can be used to enable internal secure communication between nodes. [#22992](https://github.com/ClickHouse/ClickHouse/pull/22992) ([alesapin](https://github.com/alesapin)). +* Increase `max_uri_size` (the maximum size of URL in HTTP interface) to 1 MiB by default. This closes [#21197](https://github.com/ClickHouse/ClickHouse/issues/21197). [#22997](https://github.com/ClickHouse/ClickHouse/pull/22997) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not perform optimize_skip_unused_shards for cluster with one node. [#22999](https://github.com/ClickHouse/ClickHouse/pull/22999) ([Azat Khuzhin](https://github.com/azat)). +* Raised the threshold on max number of matches in result of the function `extractAllGroupsHorizontal`. [#23036](https://github.com/ClickHouse/ClickHouse/pull/23036) ([Vasily Nemkov](https://github.com/Enmk)). +* Implement functions `arrayHasAny`, `arrayHasAll`, `has`, `indexOf`, `countEqual` for generic case when types of array elements are different. In previous versions the functions `arrayHasAny`, `arrayHasAll` returned false and `has`, `indexOf`, `countEqual` thrown exception. Also add support for `Decimal` and big integer types in functions `has` and similar. This closes [#20272](https://github.com/ClickHouse/ClickHouse/issues/20272). [#23044](https://github.com/ClickHouse/ClickHouse/pull/23044) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix memory tracking with min_bytes_to_use_mmap_io. [#23211](https://github.com/ClickHouse/ClickHouse/pull/23211) ([Azat Khuzhin](https://github.com/azat)). +* Make function `unhex` case insensitive for compatibility with MySQL. [#23229](https://github.com/ClickHouse/ClickHouse/pull/23229) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Fix very rare bug when quorum insert with `quorum_parallel=1` is not really "quorum" because of deduplication. [#18215](https://github.com/ClickHouse/ClickHouse/pull/18215) ([filimonov](https://github.com/filimonov)). +* * Fix "unknown column" error for tables with `Merge` engine in queris with `JOIN` and aggregation. Closes [#18368](https://github.com/ClickHouse/ClickHouse/issues/18368), close [#22226](https://github.com/ClickHouse/ClickHouse/issues/22226). [#21370](https://github.com/ClickHouse/ClickHouse/pull/21370) ([Vladimir C](https://github.com/vdimir)). +* Check if table function view is used as a column. This complements https://github.com/ClickHouse/ClickHouse/pull/20350. [#21465](https://github.com/ClickHouse/ClickHouse/pull/21465) ([Amos Bird](https://github.com/amosbird)). +* Fix bug, which leads to underaggregation of data in case of enabled `optimize_aggregation_in_order` and many parts in table. Slightly improve performance of aggregation with enabled `optimize_aggregation_in_order`. [#21889](https://github.com/ClickHouse/ClickHouse/pull/21889) ([Anton Popov](https://github.com/CurtizJ)). +* Do not limit HTTP chunk size. Fixes [#21907](https://github.com/ClickHouse/ClickHouse/issues/21907). [#22322](https://github.com/ClickHouse/ClickHouse/pull/22322) ([Ivan](https://github.com/abyss7)). +* Buffer overflow (on read) was possible in `tokenbf_v1` full text index. The excessive bytes are not used but the read operation may lead to crash in rare cases. This closes [#19233](https://github.com/ClickHouse/ClickHouse/issues/19233). [#22421](https://github.com/ClickHouse/ClickHouse/pull/22421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ClickHouseDictionarySource configuration loop. Closes [#14314](https://github.com/ClickHouse/ClickHouse/issues/14314). [#22479](https://github.com/ClickHouse/ClickHouse/pull/22479) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix bug in partial merge join with `LowCardinality`. Close [#22386](https://github.com/ClickHouse/ClickHouse/issues/22386), close [#22388](https://github.com/ClickHouse/ClickHouse/issues/22388). [#22510](https://github.com/ClickHouse/ClickHouse/pull/22510) ([Vladimir C](https://github.com/vdimir)). +* Follow-up fix for [#21936](https://github.com/ClickHouse/ClickHouse/issues/21936). Also fixes [#22433](https://github.com/ClickHouse/ClickHouse/issues/22433). [#22518](https://github.com/ClickHouse/ClickHouse/pull/22518) ([Ivan](https://github.com/abyss7)). +* Fix deserialization of empty string without newline at end of TSV format. This closes [#20244](https://github.com/ClickHouse/ClickHouse/issues/20244). Possible workaround without version update: set `input_format_null_as_default` to zero. It was zero in old versions. [#22527](https://github.com/ClickHouse/ClickHouse/pull/22527) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UB by unlocking the rwlock of the TinyLog from the same thread. [#22560](https://github.com/ClickHouse/ClickHouse/pull/22560) ([Azat Khuzhin](https://github.com/azat)). +* Avoid UB in *Log engines for rwlock unlock due to unlock from another thread. [#22583](https://github.com/ClickHouse/ClickHouse/pull/22583) ([Azat Khuzhin](https://github.com/azat)). +* Fix usage of function `map` in distributed queries. [#22588](https://github.com/ClickHouse/ClickHouse/pull/22588) ([foolchi](https://github.com/foolchi)). +* Try flush write buffer only if it is initialized. Fixes segfault when client closes connection very early [#22579](https://github.com/ClickHouse/ClickHouse/issues/22579). [#22591](https://github.com/ClickHouse/ClickHouse/pull/22591) ([nvartolomei](https://github.com/nvartolomei)). +* Fixed a bug with unlimited wait for auxiliary AWS requests. [#22594](https://github.com/ClickHouse/ClickHouse/pull/22594) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix LOGICAL_ERROR for Log with nested types w/o columns in the SELECT clause. [#22654](https://github.com/ClickHouse/ClickHouse/pull/22654) ([Azat Khuzhin](https://github.com/azat)). +* Fix wait for mutations on several replicas for ReplicatedMergeTree table engines. Previously, mutation/alter query may finish before mutation actually executed on other replicas. [#22669](https://github.com/ClickHouse/ClickHouse/pull/22669) ([alesapin](https://github.com/alesapin)). +* Fix possible hangs in zk requests in case of OOM exception. Fixes [#22438](https://github.com/ClickHouse/ClickHouse/issues/22438). [#22684](https://github.com/ClickHouse/ClickHouse/pull/22684) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix approx total rows accounting for reverse reading from MergeTree. [#22726](https://github.com/ClickHouse/ClickHouse/pull/22726) ([Azat Khuzhin](https://github.com/azat)). +* * Revert "Move conditions from JOIN ON to WHERE" (ClickHouse/ClickHouse[#19685](https://github.com/ClickHouse/ClickHouse/issues/19685)), close [#22399](https://github.com/ClickHouse/ClickHouse/issues/22399), close [#21671](https://github.com/ClickHouse/ClickHouse/issues/21671). [#22753](https://github.com/ClickHouse/ClickHouse/pull/22753) ([Vladimir C](https://github.com/vdimir)). +* Fix pushdown of `HAVING` in case, when filter column is used in aggregation. [#22763](https://github.com/ClickHouse/ClickHouse/pull/22763) ([Anton Popov](https://github.com/CurtizJ)). +* LIVE VIEW (experimental feature). Fix possible hanging in concurrent DROP/CREATE of TEMPORARY LIVE VIEW in `TemporaryLiveViewCleaner`, see https://gist.github.com/vzakaznikov/0c03195960fc86b56bfe2bc73a90019e. [#22858](https://github.com/ClickHouse/ClickHouse/pull/22858) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bytes_allocated for sparse_hashed dictionaries. [#22867](https://github.com/ClickHouse/ClickHouse/pull/22867) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a crash when using `mannWhitneyUTest` and `rankCorr` with window functions. This fixes [#22728](https://github.com/ClickHouse/ClickHouse/issues/22728). [#22876](https://github.com/ClickHouse/ClickHouse/pull/22876) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* fixed `formatDateTime()` on `DateTime64` and "%C" format specifier fixed `toDateTime64()` for large values and non-zero scale. ... [#22937](https://github.com/ClickHouse/ClickHouse/pull/22937) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix usage of constant columns of type `Map` with nullable values. [#22939](https://github.com/ClickHouse/ClickHouse/pull/22939) ([Anton Popov](https://github.com/CurtizJ)). +* Simplify debian packages. This fixes [#21698](https://github.com/ClickHouse/ClickHouse/issues/21698). [#22976](https://github.com/ClickHouse/ClickHouse/pull/22976) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error `Cannot find column in ActionsDAG result` which may happen if subquery uses `untuple`. Fixes [#22290](https://github.com/ClickHouse/ClickHouse/issues/22290). [#22991](https://github.com/ClickHouse/ClickHouse/pull/22991) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove non-essential details from suggestions in clickhouse-client. This closes [#22158](https://github.com/ClickHouse/ClickHouse/issues/22158). [#23040](https://github.com/ClickHouse/ClickHouse/pull/23040) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed `Table .inner_id... doesn't exist` error when selecting from Materialized View after detaching it from Atomic database and attaching back. [#23047](https://github.com/ClickHouse/ClickHouse/pull/23047) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Some values were formatted with alignment in center in table cells in `Markdown` format. Not anymore. [#23096](https://github.com/ClickHouse/ClickHouse/pull/23096) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Server might fail to start if `data_type_default_nullable` setting is enabled in default profile, it's fixed. Fixes [#22573](https://github.com/ClickHouse/ClickHouse/issues/22573). [#23185](https://github.com/ClickHouse/ClickHouse/pull/23185) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix missing whitespace in some exception messages about `LowCardinality` type. [#23207](https://github.com/ClickHouse/ClickHouse/pull/23207) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed missing semicolon in exception message. The user may find this exception message unpleasant to read. [#23208](https://github.com/ClickHouse/ClickHouse/pull/23208) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix reading from ODBC when there are many long column names in a table. Closes [#8853](https://github.com/ClickHouse/ClickHouse/issues/8853). [#23215](https://github.com/ClickHouse/ClickHouse/pull/23215) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Add on-demand check for clickhouse Keeper. [#22373](https://github.com/ClickHouse/ClickHouse/pull/22373) ([alesapin](https://github.com/alesapin)). +* Disable incompatible libraries (platform specific typically) on ppc64le ... [#22475](https://github.com/ClickHouse/ClickHouse/pull/22475) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Allow building with unbundled xz (lzma) using USE_INTERNAL_XZ_LIBRARY=OFF ... [#22571](https://github.com/ClickHouse/ClickHouse/pull/22571) ([Kfir Itzhak](https://github.com/mastertheknife)). +* Allow query profiling only on x86_64. See [#15174](https://github.com/ClickHouse/ClickHouse/issues/15174)#issuecomment-812954965 and [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638)#issuecomment-703805337. This closes [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638). [#22580](https://github.com/ClickHouse/ClickHouse/pull/22580) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adjust some tests to output identical results on amd64 and aarch64 (qemu). The result was depending on implementation specific CPU behaviour. [#22590](https://github.com/ClickHouse/ClickHouse/pull/22590) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix some tests on AArch64 platform. [#22596](https://github.com/ClickHouse/ClickHouse/pull/22596) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ClickHouse Keeper build for MacOS. [#22860](https://github.com/ClickHouse/ClickHouse/pull/22860) ([alesapin](https://github.com/alesapin)). + +#### Other +* Fix some points from this comment https://github.com/ClickHouse/ClickHouse/pull/19516#issuecomment-782047840. [#22323](https://github.com/ClickHouse/ClickHouse/pull/22323) ([Ivan](https://github.com/abyss7)). + +#### Build/Packaging/Testing Improvement + +* Build `jemalloc` with support for [heap profiling](https://github.com/jemalloc/jemalloc/wiki/Use-Case%3A-Heap-Profiling). [#22834](https://github.com/ClickHouse/ClickHouse/pull/22834) ([nvartolomei](https://github.com/nvartolomei)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Error message reads better'. [#22983](https://github.com/ClickHouse/ClickHouse/pull/22983) ([Igor O'sten](https://github.com/borodark)). + diff --git a/docs/changelogs/v21.5.2.25-prestable.md b/docs/changelogs/v21.5.2.25-prestable.md new file mode 100644 index 00000000000..45e784218da --- /dev/null +++ b/docs/changelogs/v21.5.2.25-prestable.md @@ -0,0 +1,40 @@ +### ClickHouse release v21.5.2.25-prestable FIXME as compared to v21.5.1.6601-prestable + +#### Improvement +* Backported in [#23342](https://github.com/ClickHouse/ClickHouse/issues/23342): Disable settings `use_hedged_requests` and `async_socket_for_remote` because there is an evidence that it may cause issues. [#23261](https://github.com/ClickHouse/ClickHouse/pull/23261) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23678](https://github.com/ClickHouse/ClickHouse/issues/23678): Fixed `quantile(s)TDigest`. Added special handling of singleton centroids according to tdunning/t-digest 3.2+. Also a bug with over-compression of centroids in implementation of earlier version of the algorithm was fixed. [#23314](https://github.com/ClickHouse/ClickHouse/pull/23314) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Bug Fix +* Backported in [#23343](https://github.com/ClickHouse/ClickHouse/issues/23343): This PR fixes a crash on shutdown which happened because of currentConnections() could return zero while some connections were still alive. [#23154](https://github.com/ClickHouse/ClickHouse/pull/23154) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#23405](https://github.com/ClickHouse/ClickHouse/issues/23405): QueryAliasVisitor to prefer alias for ASTWithAlias if subquery was optimized to constant. Fixes [#22924](https://github.com/ClickHouse/ClickHouse/issues/22924). Fixes [#10401](https://github.com/ClickHouse/ClickHouse/issues/10401). [#23191](https://github.com/ClickHouse/ClickHouse/pull/23191) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23383](https://github.com/ClickHouse/ClickHouse/issues/23383): Fixed `Not found column` error when selecting from `MaterializeMySQL` with condition on key column. Fixes [#22432](https://github.com/ClickHouse/ClickHouse/issues/22432). [#23200](https://github.com/ClickHouse/ClickHouse/pull/23200) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23386](https://github.com/ClickHouse/ClickHouse/issues/23386): Fixed the behavior when disabling `input_format_with_names_use_header ` setting discards all the input with CSVWithNames format. This fixes [#22406](https://github.com/ClickHouse/ClickHouse/issues/22406). [#23202](https://github.com/ClickHouse/ClickHouse/pull/23202) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#23340](https://github.com/ClickHouse/ClickHouse/issues/23340): Add type conversion for optimize_skip_unused_shards_rewrite_in (fixes `use-of-uninitialized-value` with `optimize_skip_unused_shards_rewrite_in`). [#23219](https://github.com/ClickHouse/ClickHouse/pull/23219) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#23341](https://github.com/ClickHouse/ClickHouse/issues/23341): Fixed simple key dictionary from DDL creation if primary key is not first attribute. Fixes [#23236](https://github.com/ClickHouse/ClickHouse/issues/23236). [#23262](https://github.com/ClickHouse/ClickHouse/pull/23262) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23449](https://github.com/ClickHouse/ClickHouse/issues/23449): Fixed very rare (distributed) race condition between creation and removal of ReplicatedMergeTree tables. It might cause exceptions like `node doesn't exist` on attempt to create replicated table. Fixes [#21419](https://github.com/ClickHouse/ClickHouse/issues/21419). [#23294](https://github.com/ClickHouse/ClickHouse/pull/23294) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23578](https://github.com/ClickHouse/ClickHouse/issues/23578): Fixed very rare race condition on background cleanup of old blocks. It might cause a block not to be deduplicated if it's too close to the end of deduplication window. [#23301](https://github.com/ClickHouse/ClickHouse/pull/23301) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23427](https://github.com/ClickHouse/ClickHouse/issues/23427): Fix possible crash in case if `unknown packet` was received form remote query (with `async_socket_for_remote` enabled). Maybe fixes [#21167](https://github.com/ClickHouse/ClickHouse/issues/21167). [#23309](https://github.com/ClickHouse/ClickHouse/pull/23309) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23675](https://github.com/ClickHouse/ClickHouse/issues/23675): Don't relax NOT conditions during partition pruning. This fixes [#23305](https://github.com/ClickHouse/ClickHouse/issues/23305) and [#21539](https://github.com/ClickHouse/ClickHouse/issues/21539). [#23310](https://github.com/ClickHouse/ClickHouse/pull/23310) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#23581](https://github.com/ClickHouse/ClickHouse/issues/23581): * Fix bug in dict join with join_algorithm = 'auto'. Close [#23002](https://github.com/ClickHouse/ClickHouse/issues/23002). [#23312](https://github.com/ClickHouse/ClickHouse/pull/23312) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#23428](https://github.com/ClickHouse/ClickHouse/issues/23428): Fix possible `Block structure mismatch` error for queries with `UNION` which could possibly happen after filter-push-down optimization. Fixes [#23029](https://github.com/ClickHouse/ClickHouse/issues/23029). [#23359](https://github.com/ClickHouse/ClickHouse/pull/23359) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23642](https://github.com/ClickHouse/ClickHouse/issues/23642): Fix incompatible constant expression generation during partition pruning based on virtual columns. This fixes https://github.com/ClickHouse/ClickHouse/pull/21401#discussion_r611888913. [#23366](https://github.com/ClickHouse/ClickHouse/pull/23366) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#23453](https://github.com/ClickHouse/ClickHouse/issues/23453): `ORDER BY` with `COLLATE` was not working correctly if the column is in primary key (or is a monotonic function of it) and the setting `optimize_read_in_order` is not turned off. This closes [#22379](https://github.com/ClickHouse/ClickHouse/issues/22379). Workaround for older versions: turn the setting `optimize_read_in_order` off. [#23375](https://github.com/ClickHouse/ClickHouse/pull/23375) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23510](https://github.com/ClickHouse/ClickHouse/issues/23510): Remove support for `argMin` and `argMax` for single `Tuple` argument. The code was not memory-safe. The feature was added by mistake and it is confusing for people. These functions can be reintroduced under different names later. This fixes [#22384](https://github.com/ClickHouse/ClickHouse/issues/22384) and reverts [#17359](https://github.com/ClickHouse/ClickHouse/issues/17359). [#23393](https://github.com/ClickHouse/ClickHouse/pull/23393) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23584](https://github.com/ClickHouse/ClickHouse/issues/23584): Allow to move more conditions to `PREWHERE` as it was before version 21.1. Insufficient number of moved condtions could lead to worse performance. [#23397](https://github.com/ClickHouse/ClickHouse/pull/23397) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#23477](https://github.com/ClickHouse/ClickHouse/issues/23477): Kafka storage may support `arrow` and `arrowstream` format messages. [#23415](https://github.com/ClickHouse/ClickHouse/pull/23415) ([Chao Ma](https://github.com/godliness)). +* Backported in [#23587](https://github.com/ClickHouse/ClickHouse/issues/23587): Fixed `Cannot unlink file` error on unsuccessful creation of ReplicatedMergeTree table with multidisk configuration. This closes [#21755](https://github.com/ClickHouse/ClickHouse/issues/21755). [#23433](https://github.com/ClickHouse/ClickHouse/pull/23433) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23497](https://github.com/ClickHouse/ClickHouse/issues/23497): - Bug fix for `deltaSum` aggregate function in counter reset case ... [#23437](https://github.com/ClickHouse/ClickHouse/pull/23437) ([Russ Frank](https://github.com/rf)). +* Backported in [#23491](https://github.com/ClickHouse/ClickHouse/issues/23491): Fix bug that does not allow cast from empty array literal, to array with dimensions greater than 1. Closes [#14476](https://github.com/ClickHouse/ClickHouse/issues/14476). [#23456](https://github.com/ClickHouse/ClickHouse/pull/23456) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23589](https://github.com/ClickHouse/ClickHouse/issues/23589): Fix corner cases in vertical merges with `ReplacingMergeTree`. In rare cases they could lead to fails of merges with exceptions like `Incomplete granules are not allowed while blocks are granules size`. [#23459](https://github.com/ClickHouse/ClickHouse/pull/23459) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#23535](https://github.com/ClickHouse/ClickHouse/issues/23535): When modify column's default value without datatype, and this column is used as ReplacingMergeTree's parameter like column `b` in the below example, then the server will core dump: ``` CREATE TABLE alter_test (a Int32, b DateTime) ENGINE = ReplacingMergeTree(b) ORDER BY a; ALTER TABLE alter_test MODIFY COLUMN `b` DEFAULT now(); ``` the sever throw error: ``` 2021.04.22 09:48:00.685317 [ 2607 ] {} BaseDaemon: Received signal 11 2021.04.22 09:48:00.686110 [ 2705 ] {} BaseDaemon: ######################################## 2021.04.22 09:48:00.686336 [ 2705 ] {} BaseDaemon: (version 21.6.1.1, build id: 6459E84DFCF8E778546C5AD2FFE91B3AD71E1B1B) (from thread 2619) (no query) Received signal Segmentation fault (11) 2021.04.22 09:48:00.686572 [ 2705 ] {} BaseDaemon: Address: NULL pointer. Access: read. Address not mapped to object. 2021.04.22 09:48:00.686686 [ 2705 ] {} BaseDaemon: Stack trace: 0x1c2585d7 0x1c254f66 0x1bb7e403 0x1bb58923 0x1bb56a85 0x1c6840ef 0x1c691148 0x2061a05c 0x2061a8e4 0x20775a03 0x207722bd 0x20771048 0x7f6e5c25be25 0x7f6e5bd81bad 2021.04.22 09:48:02.283045 [ 2705 ] {} BaseDaemon: 4. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1449: DB::(anonymous namespace)::checkVersionColumnTypesConversion(DB::IDataType const*, DB::IDataType const*, std::__1::basic_string, std::__1::allocator >) @ 0x1c2585d7 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:03.714451 [ 2705 ] {} BaseDaemon: 5. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1582: DB::MergeTreeData::checkAlterIsPossible(DB::AlterCommands const&, std::__1::shared_ptr) const @ 0x1c254f66 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:04.692949 [ 2705 ] {} BaseDaemon: 6. /mnt/disk4/hewenting/ClickHouse/src/src/Interpreters/InterpreterAlterQuery.cpp:144: DB::InterpreterAlterQuery::execute() @ 0x1bb7e403 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server ```. [#23483](https://github.com/ClickHouse/ClickHouse/pull/23483) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#23532](https://github.com/ClickHouse/ClickHouse/issues/23532): Fix `columns` function when multiple joins in select query. Closes [#22736](https://github.com/ClickHouse/ClickHouse/issues/22736). [#23501](https://github.com/ClickHouse/ClickHouse/pull/23501) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23613](https://github.com/ClickHouse/ClickHouse/issues/23613): Fix restart / stop command hanging. Closes [#20214](https://github.com/ClickHouse/ClickHouse/issues/20214). [#23552](https://github.com/ClickHouse/ClickHouse/pull/23552) ([filimonov](https://github.com/filimonov)). +* Backported in [#23627](https://github.com/ClickHouse/ClickHouse/issues/23627): Fix misinterpretation of some `LIKE` expressions with escape sequences. [#23610](https://github.com/ClickHouse/ClickHouse/pull/23610) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#23692](https://github.com/ClickHouse/ClickHouse/issues/23692): Fixed server fault when inserting data through HTTP caused an exception. This fixes [#23512](https://github.com/ClickHouse/ClickHouse/issues/23512). [#23643](https://github.com/ClickHouse/ClickHouse/pull/23643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#23694](https://github.com/ClickHouse/ClickHouse/issues/23694): Added an exception in case of completely the same values in both samples in aggregate function `mannWhitneyUTest`. This fixes [#23646](https://github.com/ClickHouse/ClickHouse/issues/23646). [#23654](https://github.com/ClickHouse/ClickHouse/pull/23654) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#23757](https://github.com/ClickHouse/ClickHouse/issues/23757): Avoid possible "Cannot schedule a task" error (in case some exception had been occurred) on INSERT into Distributed. [#23744](https://github.com/ClickHouse/ClickHouse/pull/23744) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL CATEGORY + +* Backported in [#23347](https://github.com/ClickHouse/ClickHouse/issues/23347):. [#23334](https://github.com/ClickHouse/ClickHouse/pull/23334) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.5.3.1-prestable.md b/docs/changelogs/v21.5.3.1-prestable.md new file mode 100644 index 00000000000..24b29d1495e --- /dev/null +++ b/docs/changelogs/v21.5.3.1-prestable.md @@ -0,0 +1,9 @@ +### ClickHouse release v21.5.3.1-prestable FIXME as compared to v21.5.2.25-prestable + +#### Bug Fix +* Backported in [#23818](https://github.com/ClickHouse/ClickHouse/issues/23818): Fix crash when `PREWHERE` and row policy filter are both in effect with empty result. [#23763](https://github.com/ClickHouse/ClickHouse/pull/23763) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#23798](https://github.com/ClickHouse/ClickHouse/issues/23798): Fixed remote JDBC bridge timeout connection issue. Closes [#9609](https://github.com/ClickHouse/ClickHouse/issues/9609). [#23771](https://github.com/ClickHouse/ClickHouse/pull/23771) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#23815](https://github.com/ClickHouse/ClickHouse/issues/23815): Fix `CLEAR COLUMN` does not work when it is referenced by materialized view. Close [#23764](https://github.com/ClickHouse/ClickHouse/issues/23764). [#23781](https://github.com/ClickHouse/ClickHouse/pull/23781) ([flynn](https://github.com/ucasfl)). +* Backported in [#23832](https://github.com/ClickHouse/ClickHouse/issues/23832): Fix error `Can't initialize pipeline with empty pipe` for queries with `GLOBAL IN/JOIN` and `use_hedged_requests`. Fixes [#23431](https://github.com/ClickHouse/ClickHouse/issues/23431). [#23805](https://github.com/ClickHouse/ClickHouse/pull/23805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#23927](https://github.com/ClickHouse/ClickHouse/issues/23927): HashedDictionary complex key update field initial load fix. Closes [#23800](https://github.com/ClickHouse/ClickHouse/issues/23800). [#23824](https://github.com/ClickHouse/ClickHouse/pull/23824) ([Maksim Kita](https://github.com/kitaisreal)). + diff --git a/docs/changelogs/v21.5.4.6-prestable.md b/docs/changelogs/v21.5.4.6-prestable.md new file mode 100644 index 00000000000..c9e040309cc --- /dev/null +++ b/docs/changelogs/v21.5.4.6-prestable.md @@ -0,0 +1,8 @@ +### ClickHouse release v21.5.4.6-prestable FIXME as compared to v21.5.3.1-prestable + +#### Bug Fix +* Backported in [#23972](https://github.com/ClickHouse/ClickHouse/issues/23972): Fixed a bug in recovery of staled `ReplicatedMergeTree` replica. Some metadata updates could be ignored by staled replica if `ALTER` query was executed during downtime of the replica. [#23742](https://github.com/ClickHouse/ClickHouse/pull/23742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#23956](https://github.com/ClickHouse/ClickHouse/issues/23956): Fix keys metrics accounting for CACHE() dictionary with duplicates in the source (leads to `DictCacheKeysRequestedMiss` overflows). [#23929](https://github.com/ClickHouse/ClickHouse/pull/23929) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#24001](https://github.com/ClickHouse/ClickHouse/issues/24001): Fix SIGSEGV for external GROUP BY and overflow row (i.e. queries like `SELECT FROM GROUP BY WITH TOTALS SETTINGS max_bytes_before_external_group_by>0, max_rows_to_group_by>0, group_by_overflow_mode='any', totals_mode='before_having'`). [#23962](https://github.com/ClickHouse/ClickHouse/pull/23962) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#24033](https://github.com/ClickHouse/ClickHouse/issues/24033): Fix crash in MergeJoin, close [#24010](https://github.com/ClickHouse/ClickHouse/issues/24010). [#24013](https://github.com/ClickHouse/ClickHouse/pull/24013) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v21.5.5.12-stable.md b/docs/changelogs/v21.5.5.12-stable.md new file mode 100644 index 00000000000..b49e2c60b08 --- /dev/null +++ b/docs/changelogs/v21.5.5.12-stable.md @@ -0,0 +1,11 @@ +### ClickHouse release v21.5.5.12-stable FIXME as compared to v21.5.4.6-prestable + +#### Bug Fix +* Backported in [#24156](https://github.com/ClickHouse/ClickHouse/issues/24156): Kafka storage may support parquet format messages. [#23412](https://github.com/ClickHouse/ClickHouse/pull/23412) ([Chao Ma](https://github.com/godliness)). +* Backported in [#24187](https://github.com/ClickHouse/ClickHouse/issues/24187): Some `ALTER PARTITION` queries might cause `Part A intersects previous part B` and `Unexpected merged part C intersecting drop range D` errors in replication queue. It's fixed. Fixes [#23296](https://github.com/ClickHouse/ClickHouse/issues/23296). [#23997](https://github.com/ClickHouse/ClickHouse/pull/23997) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24304](https://github.com/ClickHouse/ClickHouse/issues/24304): Fixed using const `DateTime` value vs `DateTime64` column in WHERE. ... [#24100](https://github.com/ClickHouse/ClickHouse/pull/24100) ([Vasily Nemkov](https://github.com/Enmk)). +* Backported in [#24142](https://github.com/ClickHouse/ClickHouse/issues/24142): Bug: explain pipeline with` select xxx final `shows wrong pipeline: ``` dell123 :) explain pipeline select z from prewhere_move_select_final final;. [#24116](https://github.com/ClickHouse/ClickHouse/pull/24116) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#24214](https://github.com/ClickHouse/ClickHouse/issues/24214): Fix race condition which could happen in RBAC under a heavy load. This PR fixes [#24090](https://github.com/ClickHouse/ClickHouse/issues/24090), [#24134](https://github.com/ClickHouse/ClickHouse/issues/24134),. [#24176](https://github.com/ClickHouse/ClickHouse/pull/24176) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#24242](https://github.com/ClickHouse/ClickHouse/issues/24242): Fix abnormal server termination due to hdfs becoming not accessible during query execution. Closes [#24117](https://github.com/ClickHouse/ClickHouse/issues/24117). [#24191](https://github.com/ClickHouse/ClickHouse/pull/24191) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#24239](https://github.com/ClickHouse/ClickHouse/issues/24239): Fix wrong typo at StorageMemory, this bug was introduced at [#15127](https://github.com/ClickHouse/ClickHouse/issues/15127), now fixed, Closes [#24192](https://github.com/ClickHouse/ClickHouse/issues/24192). [#24193](https://github.com/ClickHouse/ClickHouse/pull/24193) ([张中南](https://github.com/plugine)). + diff --git a/docs/changelogs/v21.5.6.6-stable.md b/docs/changelogs/v21.5.6.6-stable.md new file mode 100644 index 00000000000..e6160dfa784 --- /dev/null +++ b/docs/changelogs/v21.5.6.6-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v21.5.6.6-stable FIXME as compared to v21.5.5.12-stable + +#### Bug Fix +* Backported in [#24170](https://github.com/ClickHouse/ClickHouse/issues/24170): Fix a rare bug that could lead to a partially initialized table that can serve write requests (insert/alter/so on). Now such tables will be in readonly mode. [#24122](https://github.com/ClickHouse/ClickHouse/pull/24122) ([alesapin](https://github.com/alesapin)). +* Backported in [#24332](https://github.com/ClickHouse/ClickHouse/issues/24332): Set `max_threads = 1` to fix mutation fail of StorageMemory. Closes [#24274](https://github.com/ClickHouse/ClickHouse/issues/24274). [#24275](https://github.com/ClickHouse/ClickHouse/pull/24275) ([flynn](https://github.com/ucasfl)). +* Backported in [#24382](https://github.com/ClickHouse/ClickHouse/issues/24382): Allow empty HTTP headers. Fixes [#23901](https://github.com/ClickHouse/ClickHouse/issues/23901). [#24285](https://github.com/ClickHouse/ClickHouse/pull/24285) ([Ivan](https://github.com/abyss7)). +* Backported in [#24351](https://github.com/ClickHouse/ClickHouse/issues/24351): Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24509](https://github.com/ClickHouse/ClickHouse/issues/24509): Fix drop partition with intersect fake parts. In rare cases there might be parts with mutation version greater than current block number. [#24321](https://github.com/ClickHouse/ClickHouse/pull/24321) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#24540](https://github.com/ClickHouse/ClickHouse/issues/24540): Fix incorrect monotonicity of toWeek function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/5212 , and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#24557](https://github.com/ClickHouse/ClickHouse/issues/24557): Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` is being processed infinitely. This was detected on server with extremely little amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#24598](https://github.com/ClickHouse/ClickHouse/issues/24598): Fix usage of tuples in `CREATE .. AS SELECT` queries. [#24464](https://github.com/ClickHouse/ClickHouse/pull/24464) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#24601](https://github.com/ClickHouse/ClickHouse/issues/24601): Enable reading of subcolumns for distributed tables. [#24472](https://github.com/ClickHouse/ClickHouse/pull/24472) ([Anton Popov](https://github.com/CurtizJ)). + +#### NO CL CATEGORY + +* Backported in [#24750](https://github.com/ClickHouse/ClickHouse/issues/24750):. [#23276](https://github.com/ClickHouse/ClickHouse/pull/23276) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.5.7.9-stable.md b/docs/changelogs/v21.5.7.9-stable.md new file mode 100644 index 00000000000..9586e398547 --- /dev/null +++ b/docs/changelogs/v21.5.7.9-stable.md @@ -0,0 +1,42 @@ +### ClickHouse release v21.5.7.9-stable FIXME as compared to v21.5.6.6-stable + +#### Improvement +* Backported in [#24581](https://github.com/ClickHouse/ClickHouse/issues/24581): Disable min_bytes_to_use_mmap_io by default. [#23322](https://github.com/ClickHouse/ClickHouse/pull/23322) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#25222](https://github.com/ClickHouse/ClickHouse/issues/25222): Here will be listed all the bugs that I am gonna to fix in this PR. [#23518](https://github.com/ClickHouse/ClickHouse/pull/23518) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#24793](https://github.com/ClickHouse/ClickHouse/issues/24793): Avoid hiding errors like `Limit for rows or bytes to read exceeded` for scalar subqueries. [#24545](https://github.com/ClickHouse/ClickHouse/pull/24545) ([nvartolomei](https://github.com/nvartolomei)). + +#### Bug Fix +* Backported in [#24928](https://github.com/ClickHouse/ClickHouse/issues/24928): Fix implementation of connection pool of PostgreSQL engine. Closes [#23897](https://github.com/ClickHouse/ClickHouse/issues/23897). [#23909](https://github.com/ClickHouse/ClickHouse/pull/23909) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#24568](https://github.com/ClickHouse/ClickHouse/issues/24568): Use old modulo function version when used in partition key. Closes [#23508](https://github.com/ClickHouse/ClickHouse/issues/23508). [#24157](https://github.com/ClickHouse/ClickHouse/pull/24157) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#24726](https://github.com/ClickHouse/ClickHouse/issues/24726): In "multipart/form-data" message consider the CRLF preceding a boundary as part of it. Fixes [#23905](https://github.com/ClickHouse/ClickHouse/issues/23905). [#24399](https://github.com/ClickHouse/ClickHouse/pull/24399) ([Ivan](https://github.com/abyss7)). +* Backported in [#24824](https://github.com/ClickHouse/ClickHouse/issues/24824): - Fixed the deadlock that can happen during LDAP role (re)mapping, when LDAP group is mapped to a nonexistent local role. [#24431](https://github.com/ClickHouse/ClickHouse/pull/24431) ([Denis Glazachev](https://github.com/traceon)). +* Backported in [#24772](https://github.com/ClickHouse/ClickHouse/issues/24772): Fix bug which can lead to ZooKeeper client hung inside clickhouse-server. [#24721](https://github.com/ClickHouse/ClickHouse/pull/24721) ([alesapin](https://github.com/alesapin)). +* Backported in [#24935](https://github.com/ClickHouse/ClickHouse/issues/24935): - If ZooKeeper connection was lost and replica was cloned after restoring the connection, its replication queue might contain outdated entries. It's fixed. - Fixed crash when replication queue contains intersecting virtual parts. It may rarely happen if some data part was lost. Print error in log instead of terminating. [#24777](https://github.com/ClickHouse/ClickHouse/pull/24777) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24854](https://github.com/ClickHouse/ClickHouse/issues/24854): Fix bug when exception `Mutation was killed` can be thrown to the client on mutation wait when mutation not loaded into memory yet. [#24809](https://github.com/ClickHouse/ClickHouse/pull/24809) ([alesapin](https://github.com/alesapin)). +* Backported in [#24986](https://github.com/ClickHouse/ClickHouse/issues/24986): Allow NULL values in postgresql protocol. Closes [#22622](https://github.com/ClickHouse/ClickHouse/issues/22622). [#24857](https://github.com/ClickHouse/ClickHouse/pull/24857) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#25481](https://github.com/ClickHouse/ClickHouse/issues/25481): Fix "Missing columns" exception when joining Distributed Materialized View. [#24870](https://github.com/ClickHouse/ClickHouse/pull/24870) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#24915](https://github.com/ClickHouse/ClickHouse/issues/24915): Fix extremely rare bug on low-memory servers which can lead to the inability to perform merges without restart. Possibly fixes [#24603](https://github.com/ClickHouse/ClickHouse/issues/24603). [#24872](https://github.com/ClickHouse/ClickHouse/pull/24872) ([alesapin](https://github.com/alesapin)). +* Backported in [#25183](https://github.com/ClickHouse/ClickHouse/issues/25183): Fixed bug with declaring S3 disk at root of bucket. Earlier, it reported an error: ``` [heather] 2021.05.10 02:11:11.932234 [ 72790 ] {2ff80b7b-ec53-41cb-ac35-19bb390e1759} executeQuery: Code: 36, e.displayText() = DB::Exception: Key name is empty in path style S3 URI: (http://172.17.0.2/bucket/) (version 21.6.1.1) (from 127.0.0.1:47994) (in query: SELECT policy_name FROM system.storage_policies), Stack trace (when copying this message, always include the lines below):. [#24898](https://github.com/ClickHouse/ClickHouse/pull/24898) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#24950](https://github.com/ClickHouse/ClickHouse/issues/24950): Fix possible heap-buffer-overflow in Arrow. [#24922](https://github.com/ClickHouse/ClickHouse/pull/24922) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25024](https://github.com/ClickHouse/ClickHouse/issues/25024): Fix limit/offset settings for distributed queries (ignore on the remote nodes). [#24940](https://github.com/ClickHouse/ClickHouse/pull/24940) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#25048](https://github.com/ClickHouse/ClickHouse/issues/25048): Fix extremely rare error `Tagging already tagged part` in replication queue during concurrent `alter move/replace partition`. Possibly fixes [#22142](https://github.com/ClickHouse/ClickHouse/issues/22142). [#24961](https://github.com/ClickHouse/ClickHouse/pull/24961) ([alesapin](https://github.com/alesapin)). +* Backported in [#25365](https://github.com/ClickHouse/ClickHouse/issues/25365): Fix serialization of splitted nested messages in Protobuf format. This PR fixes [#24647](https://github.com/ClickHouse/ClickHouse/issues/24647). [#25000](https://github.com/ClickHouse/ClickHouse/pull/25000) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#25101](https://github.com/ClickHouse/ClickHouse/issues/25101): Distinguish KILL MUTATION for different tables (fixes unexpected `Cancelled mutating parts` error). [#25025](https://github.com/ClickHouse/ClickHouse/pull/25025) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#25558](https://github.com/ClickHouse/ClickHouse/issues/25558): Fix bug which allows creating tables with columns referencing themselves like `a UInt32 ALIAS a + 1` or `b UInt32 MATERIALIZED b`. Fixes [#24910](https://github.com/ClickHouse/ClickHouse/issues/24910), [#24292](https://github.com/ClickHouse/ClickHouse/issues/24292). [#25059](https://github.com/ClickHouse/ClickHouse/pull/25059) ([alesapin](https://github.com/alesapin)). +* Backported in [#25104](https://github.com/ClickHouse/ClickHouse/issues/25104): Fix bug with constant maps in mapContains that lead to error `empty column was returned by function mapContains`. Closes [#25077](https://github.com/ClickHouse/ClickHouse/issues/25077). [#25080](https://github.com/ClickHouse/ClickHouse/pull/25080) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25141](https://github.com/ClickHouse/ClickHouse/issues/25141): Fix crash in query with cross join and `joined_subquery_requires_alias = 0`. Fixes [#24011](https://github.com/ClickHouse/ClickHouse/issues/24011). [#25082](https://github.com/ClickHouse/ClickHouse/pull/25082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25214](https://github.com/ClickHouse/ClickHouse/issues/25214): Fixed an error which occurred while inserting a subset of columns using CSVWithNames format. Fixes [#25129](https://github.com/ClickHouse/ClickHouse/issues/25129). [#25169](https://github.com/ClickHouse/ClickHouse/pull/25169) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#25353](https://github.com/ClickHouse/ClickHouse/issues/25353): Fix TOCTOU error in installation script. [#25277](https://github.com/ClickHouse/ClickHouse/pull/25277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25471](https://github.com/ClickHouse/ClickHouse/issues/25471): Fix joinGetOrNull with not-nullable columns. This fixes [#24261](https://github.com/ClickHouse/ClickHouse/issues/24261). [#25288](https://github.com/ClickHouse/ClickHouse/pull/25288) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#25357](https://github.com/ClickHouse/ClickHouse/issues/25357): Fix Logical Error Cannot sum Array/Tuple in min/maxMap. [#25298](https://github.com/ClickHouse/ClickHouse/pull/25298) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25438](https://github.com/ClickHouse/ClickHouse/issues/25438): Support `SimpleAggregateFunction(LowCardinality)` for `SummingMergeTree`. Fixes [#25134](https://github.com/ClickHouse/ClickHouse/issues/25134). [#25300](https://github.com/ClickHouse/ClickHouse/pull/25300) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25362](https://github.com/ClickHouse/ClickHouse/issues/25362): On ZooKeeper connection loss `ReplicatedMergeTree` table might wait for background operations to complete before trying to reconnect. It's fixed, now background operations are stopped forcefully. [#25306](https://github.com/ClickHouse/ClickHouse/pull/25306) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25386](https://github.com/ClickHouse/ClickHouse/issues/25386): Fix the possibility of non-deterministic behaviour of the `quantileDeterministic` function and similar. This closes [#20480](https://github.com/ClickHouse/ClickHouse/issues/20480). [#25313](https://github.com/ClickHouse/ClickHouse/pull/25313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25456](https://github.com/ClickHouse/ClickHouse/issues/25456): Fix lost `WHERE` condition in expression-push-down optimization of query plan (setting `query_plan_filter_push_down = 1` by default). Fixes [#25368](https://github.com/ClickHouse/ClickHouse/issues/25368). [#25370](https://github.com/ClickHouse/ClickHouse/pull/25370) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25408](https://github.com/ClickHouse/ClickHouse/issues/25408): Fix `REPLACE` column transformer when used in DDL by correctly quoting the formated query. This fixes [#23925](https://github.com/ClickHouse/ClickHouse/issues/23925). [#25391](https://github.com/ClickHouse/ClickHouse/pull/25391) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#25504](https://github.com/ClickHouse/ClickHouse/issues/25504): Fix segfault when sharding_key is absent in task config for copier. [#25419](https://github.com/ClickHouse/ClickHouse/pull/25419) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#24721](https://github.com/ClickHouse/ClickHouse/issues/24721) to 21.5: Remove endless `wait` from ZooKeeper client"'. [#24798](https://github.com/ClickHouse/ClickHouse/pull/24798) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v21.5.8.21-stable.md b/docs/changelogs/v21.5.8.21-stable.md new file mode 100644 index 00000000000..4d9ffd687eb --- /dev/null +++ b/docs/changelogs/v21.5.8.21-stable.md @@ -0,0 +1,13 @@ +### ClickHouse release v21.5.8.21-stable FIXME as compared to v21.5.7.9-stable + +#### Bug Fix +* Backported in [#25849](https://github.com/ClickHouse/ClickHouse/issues/25849): `CAST` from `Date` to `DateTime` (or `DateTime64`) was not using the timezone of the `DateTime` type. It can also affect the comparison between `Date` and `DateTime`. Inference of the common type for `Date` and `DateTime` also was not using the corresponding timezone. It affected the results of function `if` and array construction. Closes [#24128](https://github.com/ClickHouse/ClickHouse/issues/24128). [#24129](https://github.com/ClickHouse/ClickHouse/pull/24129) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#25679](https://github.com/ClickHouse/ClickHouse/issues/25679): Fixed bug in deserialization of random generator state with might cause some data types such as `AggregateFunction(groupArraySample(N), T))` to behave in a non-deterministic way. [#24538](https://github.com/ClickHouse/ClickHouse/pull/24538) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25556](https://github.com/ClickHouse/ClickHouse/issues/25556): Fix potential crash when calculating aggregate function states by aggregation of aggregate function states of other aggregate functions (not a practical use case). See [#24523](https://github.com/ClickHouse/ClickHouse/issues/24523). [#25015](https://github.com/ClickHouse/ClickHouse/pull/25015) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25767](https://github.com/ClickHouse/ClickHouse/issues/25767): Fix assertion in PREWHERE with non-uint8 type, close [#19589](https://github.com/ClickHouse/ClickHouse/issues/19589). [#25484](https://github.com/ClickHouse/ClickHouse/pull/25484) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#25636](https://github.com/ClickHouse/ClickHouse/issues/25636): Fix wrong totals for query `WITH TOTALS` and `WITH FILL`. Fixes [#20872](https://github.com/ClickHouse/ClickHouse/issues/20872). [#25539](https://github.com/ClickHouse/ClickHouse/pull/25539) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#25653](https://github.com/ClickHouse/ClickHouse/issues/25653): Fix null pointer dereference in `EXPLAIN AST` without query. [#25631](https://github.com/ClickHouse/ClickHouse/pull/25631) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25715](https://github.com/ClickHouse/ClickHouse/issues/25715): `REPLACE PARTITION` might be ignored in rare cases if the source partition was empty. It's fixed. Fixes [#24869](https://github.com/ClickHouse/ClickHouse/issues/24869). [#25665](https://github.com/ClickHouse/ClickHouse/pull/25665) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25695](https://github.com/ClickHouse/ClickHouse/issues/25695): Fixed `No such file or directory` error on moving `Distributed` table between databases. Fixes [#24971](https://github.com/ClickHouse/ClickHouse/issues/24971). [#25667](https://github.com/ClickHouse/ClickHouse/pull/25667) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25754](https://github.com/ClickHouse/ClickHouse/issues/25754): Fix data race when querying `system.clusters` while reloading the cluster configuration at the same time. [#25737](https://github.com/ClickHouse/ClickHouse/pull/25737) ([Amos Bird](https://github.com/amosbird)). + diff --git a/docs/changelogs/v21.5.9.4-stable.md b/docs/changelogs/v21.5.9.4-stable.md new file mode 100644 index 00000000000..17ef067194b --- /dev/null +++ b/docs/changelogs/v21.5.9.4-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v21.5.9.4-stable FIXME as compared to v21.5.8.21-stable + +#### Bug Fix +* Backported in [#25958](https://github.com/ClickHouse/ClickHouse/issues/25958): Fix extremely long backoff for background tasks when the background pool is full. Fixes [#25836](https://github.com/ClickHouse/ClickHouse/issues/25836). [#25893](https://github.com/ClickHouse/ClickHouse/pull/25893) ([alesapin](https://github.com/alesapin)). +* Backported in [#26144](https://github.com/ClickHouse/ClickHouse/issues/26144): Fix possible crash in `pointInPolygon` if the setting `validate_polygons` is turned off. [#26113](https://github.com/ClickHouse/ClickHouse/pull/26113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.6.1.6891-prestable.md b/docs/changelogs/v21.6.1.6891-prestable.md new file mode 100644 index 00000000000..7750cac32eb --- /dev/null +++ b/docs/changelogs/v21.6.1.6891-prestable.md @@ -0,0 +1,159 @@ +### ClickHouse release v21.6.1.6891-prestable FIXME as compared to v21.5.1.6601-prestable + +#### New Feature +* Add projection support for MergeTree* tables. [#20202](https://github.com/ClickHouse/ClickHouse/pull/20202) ([Amos Bird](https://github.com/amosbird)). +* Add back `indexHint` function. This is for [#21238](https://github.com/ClickHouse/ClickHouse/issues/21238) . This reverts https://github.com/ClickHouse/ClickHouse/pull/9542 . This fixes [#9540](https://github.com/ClickHouse/ClickHouse/issues/9540) . [#21304](https://github.com/ClickHouse/ClickHouse/pull/21304) ([Amos Bird](https://github.com/amosbird)). +* 1. Add aggregate function sumCount. This function returns a tuple of two fields: sum and count. [#21337](https://github.com/ClickHouse/ClickHouse/pull/21337) ([hexiaoting](https://github.com/hexiaoting)). +* Added less secure IMDS credentials provider for S3 which works under docker correctly. [#21852](https://github.com/ClickHouse/ClickHouse/pull/21852) ([Vladimir Chebotarev](https://github.com/excitoon)). +* - New aggregate function `deltaSumTimestamp` for summing the difference between consecutive rows while maintaining ordering during merge by storing timestamps. [#21888](https://github.com/ClickHouse/ClickHouse/pull/21888) ([Russ Frank](https://github.com/rf)). +* - LDAP: implemented user DN detection functionality to use when mapping Active Directory groups to ClickHouse roles. [#22228](https://github.com/ClickHouse/ClickHouse/pull/22228) ([Denis Glazachev](https://github.com/traceon)). +* Introduce a new function: arrayProduct which accept an array as the parameter, and return the product of all the elements in array. Close issue: [#21613](https://github.com/ClickHouse/ClickHouse/issues/21613). [#22242](https://github.com/ClickHouse/ClickHouse/pull/22242) ([hexiaoting](https://github.com/hexiaoting)). +* Add setting `indexes` (boolean, disabled by default) to `EXPLAIN PIPELINE` query. When enabled, shows used indexes, number of filtered parts and granules for every index applied. Supported for `MergeTree*` tables. [#22352](https://github.com/ClickHouse/ClickHouse/pull/22352) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add setting `json` (boolean, 0 by default) for `EXPLAIN PLAN` query. When enabled, query output will be a single `JSON` row. It is recommended to use `TSVRaw` format to avoid unnecessary escaping. [#23082](https://github.com/ClickHouse/ClickHouse/pull/23082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Added `SYSTEM QUERY RELOAD MODEL`, `SYSTEM QUERY RELOAD MODELS`. Closes [#18722](https://github.com/ClickHouse/ClickHouse/issues/18722). [#23182](https://github.com/ClickHouse/ClickHouse/pull/23182) ([Maksim Kita](https://github.com/kitaisreal)). +* Made progress bar for LocalServer and united it for Client and Local. [#23196](https://github.com/ClickHouse/ClickHouse/pull/23196) ([Egor Savin](https://github.com/Amesaru)). +* Support DDL dictionaries for DatabaseMemory. Closes [#22354](https://github.com/ClickHouse/ClickHouse/issues/22354). Added support for `DETACH DICTIONARY PERMANENTLY`. Added support for `EXCHANGE DICTIONARIES` for Atomic database engine. Added support for moving dictionaries between databases using `RENAME DICTIONARY`. [#23436](https://github.com/ClickHouse/ClickHouse/pull/23436) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow globs `{...}`, which act like shards, and failover options with separator `|` for URL table function. Closes [#17181](https://github.com/ClickHouse/ClickHouse/issues/17181). [#23446](https://github.com/ClickHouse/ClickHouse/pull/23446) ([Kseniia Sumarokova](https://github.com/kssenii)). +* If `insert_null_as_default` = 1, insert default values instead of NULL in `INSERT ... SELECT` and `INSERT ... SELECT ... UNION ALL ...` queries. Closes [#22832](https://github.com/ClickHouse/ClickHouse/issues/22832). [#23524](https://github.com/ClickHouse/ClickHouse/pull/23524) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Implement table comments. closes [#23225](https://github.com/ClickHouse/ClickHouse/issues/23225). [#23548](https://github.com/ClickHouse/ClickHouse/pull/23548) ([flynn](https://github.com/ucasfl)). +* Introduce a new function: arrayProduct which accept an array as the parameter, and return the product of all the elements in array. Closes [#21613](https://github.com/ClickHouse/ClickHouse/issues/21613). [#23782](https://github.com/ClickHouse/ClickHouse/pull/23782) ([Maksim Kita](https://github.com/kitaisreal)). +* Add postgres-like cast operator (`::`). E.g.: `[1, 2]::Array(UInt8)`, `0.1::Decimal(4, 4)`, `number::UInt16`. [#23871](https://github.com/ClickHouse/ClickHouse/pull/23871) ([Anton Popov](https://github.com/CurtizJ)). +* ... [#23910](https://github.com/ClickHouse/ClickHouse/pull/23910) ([Xiang Zhou](https://github.com/javainthinking)). +* Add function splitByRegexp. [#24077](https://github.com/ClickHouse/ClickHouse/pull/24077) ([abel-cheng](https://github.com/abel-cheng)). +* Add `thread_name` column in `system.stack_trace`. This closes [#23256](https://github.com/ClickHouse/ClickHouse/issues/23256). [#24124](https://github.com/ClickHouse/ClickHouse/pull/24124) ([abel-cheng](https://github.com/abel-cheng)). + +#### Performance Improvement +* Enable `compile_expressions` setting by default. When this setting enabled, compositions of simple functions and operators will be compiled to native code with LLVM at runtime. [#8482](https://github.com/ClickHouse/ClickHouse/pull/8482) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ORC input format reading by stripe instead of reading entire table into memory by once which is cost memory when file size is huge. [#23102](https://github.com/ClickHouse/ClickHouse/pull/23102) ([Chao Ma](https://github.com/godliness)). +* Update `re2` library. Performance of regular expressions matching is improved. Also this PR adds compatibility with gcc-11. [#24196](https://github.com/ClickHouse/ClickHouse/pull/24196) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Support Array data type for inserting and selecting data in Arrow, Parquet and ORC formats. [#21770](https://github.com/ClickHouse/ClickHouse/pull/21770) ([taylor12805](https://github.com/taylor12805)). +* Add settings `external_storage_max_read_rows` and `external_storage_max_read_rows` for MySQL table engine, dictionary source and MaterializeMySQL minor data fetches. [#22697](https://github.com/ClickHouse/ClickHouse/pull/22697) ([TCeason](https://github.com/TCeason)). +* Retries on HTTP connection drops in S3. [#22988](https://github.com/ClickHouse/ClickHouse/pull/22988) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix the case when a progress bar in interactive mode in clickhouse-client that appear in the middle of the data may rewrite some parts of visible data in terminal. This closes [#19283](https://github.com/ClickHouse/ClickHouse/issues/19283). [#23050](https://github.com/ClickHouse/ClickHouse/pull/23050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added possibility to restore MergeTree parts to 'detached' directory for DiskS3. [#23112](https://github.com/ClickHouse/ClickHouse/pull/23112) ([Pavel Kovalenko](https://github.com/Jokser)). +* Skip unavaiable replicas when writing to distributed tables. [#23152](https://github.com/ClickHouse/ClickHouse/pull/23152) ([Amos Bird](https://github.com/amosbird)). +* Support LowCardinality nullability with `join_use_nulls`, close [#15101](https://github.com/ClickHouse/ClickHouse/issues/15101). [#23237](https://github.com/ClickHouse/ClickHouse/pull/23237) ([Vladimir C](https://github.com/vdimir)). +* Disable settings `use_hedged_requests` and `async_socket_for_remote` because there is an evidence that it may cause issues. [#23261](https://github.com/ClickHouse/ClickHouse/pull/23261) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed `quantile(s)TDigest`. Added special handling of singleton centroids according to tdunning/t-digest 3.2+. Also a bug with over-compression of centroids in implementation of earlier version of the algorithm was fixed. [#23314](https://github.com/ClickHouse/ClickHouse/pull/23314) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Allow user to specify empty string instead of database name for `MySQL` storage. Default database will be used for queries. In previous versions it was working for SELECT queries and not support for INSERT was also added. This closes [#19281](https://github.com/ClickHouse/ClickHouse/issues/19281). This can be useful working with `Sphinx` or other MySQL-compatible foreign databases. [#23319](https://github.com/ClickHouse/ClickHouse/pull/23319) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable min_bytes_to_use_mmap_io by default. [#23322](https://github.com/ClickHouse/ClickHouse/pull/23322) ([Azat Khuzhin](https://github.com/azat)). +* If user applied a misconfiguration by mistakenly setting `max_distributed_connections` to value zero, every query to a `Distributed` table will throw exception with a message containing "logical error". But it's really an expected behaviour, not a logical error, so the exception message was slightly incorrect. It also triggered checks in our CI enviroment that ensures that no logical errors ever happen. Instead we will treat `max_distributed_connections` misconfigured to zero as the minimum possible value (one). [#23348](https://github.com/ClickHouse/ClickHouse/pull/23348) ([Azat Khuzhin](https://github.com/azat)). +* Keep default timezone on DateTime operations if it was not provided explicitly. For example, if you add one second to a value of `DateTime` type without timezone it will remain `DateTime` without timezone. In previous versions the value of default timezone was placed to the returned data type explicitly so it becomes DateTime('something'). This closes [#4854](https://github.com/ClickHouse/ClickHouse/issues/4854). [#23392](https://github.com/ClickHouse/ClickHouse/pull/23392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Previously, MySQL 5.7.9 was not supported due to SQL incompatibility. Now leave MySQL parameter verification to the MaterializeMySQL. [#23413](https://github.com/ClickHouse/ClickHouse/pull/23413) ([TCeason](https://github.com/TCeason)). +* Possibility to change S3 disk settings in runtime via new `SYSTEM RESTART DISK` SQL command. [#23429](https://github.com/ClickHouse/ClickHouse/pull/23429) ([Pavel Kovalenko](https://github.com/Jokser)). +* Respect lock_acquire_timeout_for_background_operations for OPTIMIZE. [#23623](https://github.com/ClickHouse/ClickHouse/pull/23623) ([Azat Khuzhin](https://github.com/azat)). +* Make big integers production ready. Add support for `UInt128` data type. Fix known issues with the `Decimal256` data type. Support big integers in dictionaries. Support `gcd`/`lcm` functions for big integers. Support big integers in array search and conditional functions. Support `LowCardinality(UUID)`. Support big integers in `generateRandom` table function and `clickhouse-obfuscator`. Fix error with returning `UUID` from scalar subqueries. This fixes [#7834](https://github.com/ClickHouse/ClickHouse/issues/7834). This fixes [#23936](https://github.com/ClickHouse/ClickHouse/issues/23936). This fixes [#4176](https://github.com/ClickHouse/ClickHouse/issues/4176). This fixes [#24018](https://github.com/ClickHouse/ClickHouse/issues/24018). This fixes [#17828](https://github.com/ClickHouse/ClickHouse/issues/17828). Backward incompatible change: values of `UUID` type cannot be compared with integer. For example, instead of writing `uuid != 0` type `uuid != '00000000-0000-0000-0000-000000000000'`. [#23631](https://github.com/ClickHouse/ClickHouse/pull/23631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `_partition_value` virtual column to MergeTree table family. It can be used to prune partition in a deterministic way. It's needed to implement partition matcher for mutations. [#23673](https://github.com/ClickHouse/ClickHouse/pull/23673) ([Amos Bird](https://github.com/amosbird)). +* Enable `async_socket_for_remote` by default. [#23683](https://github.com/ClickHouse/ClickHouse/pull/23683) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* When there is some `ReplicatedMergeTree` tables whose zookeeper is expired, it will throw the error below when we select the meta data of some table from `system.tables` with `select_sequential_consistency` is enabled: `Session expired (Session expired): While executing Tables`. [#23793](https://github.com/ClickHouse/ClickHouse/pull/23793) ([Fuwang Hu](https://github.com/fuwhu)). +* Added `region` parameter for S3 storage and disk. [#23846](https://github.com/ClickHouse/ClickHouse/pull/23846) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Allow configuring different log levels for different logging channels. Closes [#19569](https://github.com/ClickHouse/ClickHouse/issues/19569). [#23857](https://github.com/ClickHouse/ClickHouse/pull/23857) ([filimonov](https://github.com/filimonov)). +* Add `broken_data_files`/`broken_data_compressed_bytes` into `system.distribution_queue`. Add metric for number of files for asynchronous insertion into Distributed tables that has been marked as broken (`BrokenDistributedFilesToInsert`). [#23885](https://github.com/ClickHouse/ClickHouse/pull/23885) ([Azat Khuzhin](https://github.com/azat)). +* Allow to add specific queue settings via table settng `rabbitmq_queue_settings_list`. (Closes [#23737](https://github.com/ClickHouse/ClickHouse/issues/23737) and [#23918](https://github.com/ClickHouse/ClickHouse/issues/23918)). Allow user to control all RabbitMQ setup: if table setting `rabbitmq_queue_consume` is set to `1` - RabbitMQ table engine will only connect to specified queue and will not perform any RabbitMQ consumer-side setup like declaring exchange, queues, bindings. (Closes [#21757](https://github.com/ClickHouse/ClickHouse/issues/21757)). Add proper cleanup when RabbitMQ table is dropped - delete queues, which the table has declared and all bound exchanges - if they were created by the table. [#23887](https://github.com/ClickHouse/ClickHouse/pull/23887) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Measure found rate (the percentage for which the value was found) for dictionaries (see `found_rate` in `system.dictionaries`). [#23916](https://github.com/ClickHouse/ClickHouse/pull/23916) ([Azat Khuzhin](https://github.com/azat)). +* Add hints for Enum names. Closes [#17112](https://github.com/ClickHouse/ClickHouse/issues/17112). [#23919](https://github.com/ClickHouse/ClickHouse/pull/23919) ([flynn](https://github.com/ucasfl)). +* Add support for HTTP compression (determined by `Content-Encoding` HTTP header) in `http` dictionary source. This fixes [#8912](https://github.com/ClickHouse/ClickHouse/issues/8912). [#23946](https://github.com/ClickHouse/ClickHouse/pull/23946) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Preallocate support for hashed/sparse_hashed dictionaries. [#23979](https://github.com/ClickHouse/ClickHouse/pull/23979) ([Azat Khuzhin](https://github.com/azat)). +* Support specifying table schema for postgresql dictionary source. Closes [#23958](https://github.com/ClickHouse/ClickHouse/issues/23958). [#23980](https://github.com/ClickHouse/ClickHouse/pull/23980) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Log information about OS name, kernel version and CPU architecture on server startup. [#23988](https://github.com/ClickHouse/ClickHouse/pull/23988) ([Azat Khuzhin](https://github.com/azat)). +* enable DateTime64 to be a version column in ReplacingMergeTree. [#23992](https://github.com/ClickHouse/ClickHouse/pull/23992) ([kevin wan](https://github.com/MaxWk)). +* Add support for `ORDER BY WITH FILL` with `DateTime64`. [#24016](https://github.com/ClickHouse/ClickHouse/pull/24016) ([kevin wan](https://github.com/MaxWk)). +* Now `prefer_column_name_to_alias = 1` will also favor column names for `group by`, `having` and `order by`. This fixes [#23882](https://github.com/ClickHouse/ClickHouse/issues/23882). [#24022](https://github.com/ClickHouse/ClickHouse/pull/24022) ([Amos Bird](https://github.com/amosbird)). +* Do not acquire lock for total_bytes/total_rows for Buffer engine. [#24066](https://github.com/ClickHouse/ClickHouse/pull/24066) ([Azat Khuzhin](https://github.com/azat)). +* Flush Buffer tables before shutting down tables (within one database), to avoid discarding blocks due to underlying table had been already detached (and `Destination table default.a_data_01870 doesn't exist. Block of data is discarded` error in the log). [#24067](https://github.com/ClickHouse/ClickHouse/pull/24067) ([Azat Khuzhin](https://github.com/azat)). +* Preserve dictionaries until storage shutdown (this will avoid possible `external dictionary 'DICT' not found` errors at server shutdown during final Buffer flush). [#24068](https://github.com/ClickHouse/ClickHouse/pull/24068) ([Azat Khuzhin](https://github.com/azat)). +* Update zstd to v1.5.0. [#24135](https://github.com/ClickHouse/ClickHouse/pull/24135) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when memory allocation fails in simdjson. https://github.com/simdjson/simdjson/pull/1567 . Mark as improvement because it's a rare bug. [#24147](https://github.com/ClickHouse/ClickHouse/pull/24147) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix +* This PR fixes a crash on shutdown which happened because of currentConnections() could return zero while some connections were still alive. [#23154](https://github.com/ClickHouse/ClickHouse/pull/23154) ([Vitaly Baranov](https://github.com/vitlibar)). +* QueryAliasVisitor to prefer alias for ASTWithAlias if subquery was optimized to constant. Fixes [#22924](https://github.com/ClickHouse/ClickHouse/issues/22924). Fixes [#10401](https://github.com/ClickHouse/ClickHouse/issues/10401). [#23191](https://github.com/ClickHouse/ClickHouse/pull/23191) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed `Not found column` error when selecting from `MaterializeMySQL` with condition on key column. Fixes [#22432](https://github.com/ClickHouse/ClickHouse/issues/22432). [#23200](https://github.com/ClickHouse/ClickHouse/pull/23200) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed the behavior when disabling `input_format_with_names_use_header ` setting discards all the input with CSVWithNames format. This fixes [#22406](https://github.com/ClickHouse/ClickHouse/issues/22406). [#23202](https://github.com/ClickHouse/ClickHouse/pull/23202) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add type conversion for optimize_skip_unused_shards_rewrite_in (fixes `use-of-uninitialized-value` with `optimize_skip_unused_shards_rewrite_in`). [#23219](https://github.com/ClickHouse/ClickHouse/pull/23219) ([Azat Khuzhin](https://github.com/azat)). +* Fixed simple key dictionary from DDL creation if primary key is not first attribute. Fixes [#23236](https://github.com/ClickHouse/ClickHouse/issues/23236). [#23262](https://github.com/ClickHouse/ClickHouse/pull/23262) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed very rare (distributed) race condition between creation and removal of ReplicatedMergeTree tables. It might cause exceptions like `node doesn't exist` on attempt to create replicated table. Fixes [#21419](https://github.com/ClickHouse/ClickHouse/issues/21419). [#23294](https://github.com/ClickHouse/ClickHouse/pull/23294) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed very rare race condition on background cleanup of old blocks. It might cause a block not to be deduplicated if it's too close to the end of deduplication window. [#23301](https://github.com/ClickHouse/ClickHouse/pull/23301) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible crash in case if `unknown packet` was received form remote query (with `async_socket_for_remote` enabled). Maybe fixes [#21167](https://github.com/ClickHouse/ClickHouse/issues/21167). [#23309](https://github.com/ClickHouse/ClickHouse/pull/23309) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't relax NOT conditions during partition pruning. This fixes [#23305](https://github.com/ClickHouse/ClickHouse/issues/23305) and [#21539](https://github.com/ClickHouse/ClickHouse/issues/21539). [#23310](https://github.com/ClickHouse/ClickHouse/pull/23310) ([Amos Bird](https://github.com/amosbird)). +* * Fix bug in dict join with join_algorithm = 'auto'. Close [#23002](https://github.com/ClickHouse/ClickHouse/issues/23002). [#23312](https://github.com/ClickHouse/ClickHouse/pull/23312) ([Vladimir C](https://github.com/vdimir)). +* Fix possible `Block structure mismatch` error for queries with `UNION` which could possibly happen after filter-push-down optimization. Fixes [#23029](https://github.com/ClickHouse/ClickHouse/issues/23029). [#23359](https://github.com/ClickHouse/ClickHouse/pull/23359) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incompatible constant expression generation during partition pruning based on virtual columns. This fixes https://github.com/ClickHouse/ClickHouse/pull/21401#discussion_r611888913. [#23366](https://github.com/ClickHouse/ClickHouse/pull/23366) ([Amos Bird](https://github.com/amosbird)). +* `ORDER BY` with `COLLATE` was not working correctly if the column is in primary key (or is a monotonic function of it) and the setting `optimize_read_in_order` is not turned off. This closes [#22379](https://github.com/ClickHouse/ClickHouse/issues/22379). Workaround for older versions: turn the setting `optimize_read_in_order` off. [#23375](https://github.com/ClickHouse/ClickHouse/pull/23375) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove support for `argMin` and `argMax` for single `Tuple` argument. The code was not memory-safe. The feature was added by mistake and it is confusing for people. These functions can be reintroduced under different names later. This fixes [#22384](https://github.com/ClickHouse/ClickHouse/issues/22384) and reverts [#17359](https://github.com/ClickHouse/ClickHouse/issues/17359). [#23393](https://github.com/ClickHouse/ClickHouse/pull/23393) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to move more conditions to `PREWHERE` as it was before version 21.1. Insufficient number of moved condtions could lead to worse performance. [#23397](https://github.com/ClickHouse/ClickHouse/pull/23397) ([Anton Popov](https://github.com/CurtizJ)). +* Kafka storage may support parquet format messages. [#23412](https://github.com/ClickHouse/ClickHouse/pull/23412) ([Chao Ma](https://github.com/godliness)). +* Kafka storage may support `arrow` and `arrowstream` format messages. [#23415](https://github.com/ClickHouse/ClickHouse/pull/23415) ([Chao Ma](https://github.com/godliness)). +* Fixed `Cannot unlink file` error on unsuccessful creation of ReplicatedMergeTree table with multidisk configuration. This closes [#21755](https://github.com/ClickHouse/ClickHouse/issues/21755). [#23433](https://github.com/ClickHouse/ClickHouse/pull/23433) ([Alexander Tokmakov](https://github.com/tavplubix)). +* - Bug fix for `deltaSum` aggregate function in counter reset case ... [#23437](https://github.com/ClickHouse/ClickHouse/pull/23437) ([Russ Frank](https://github.com/rf)). +* Fix bug that does not allow cast from empty array literal, to array with dimensions greater than 1. Closes [#14476](https://github.com/ClickHouse/ClickHouse/issues/14476). [#23456](https://github.com/ClickHouse/ClickHouse/pull/23456) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix corner cases in vertical merges with `ReplacingMergeTree`. In rare cases they could lead to fails of merges with exceptions like `Incomplete granules are not allowed while blocks are granules size`. [#23459](https://github.com/ClickHouse/ClickHouse/pull/23459) ([Anton Popov](https://github.com/CurtizJ)). +* When modify column's default value without datatype, and this column is used as ReplacingMergeTree's parameter like column `b` in the below example, then the server will core dump: ``` CREATE TABLE alter_test (a Int32, b DateTime) ENGINE = ReplacingMergeTree(b) ORDER BY a; ALTER TABLE alter_test MODIFY COLUMN `b` DEFAULT now(); ``` the sever throw error: ``` 2021.04.22 09:48:00.685317 [ 2607 ] {} BaseDaemon: Received signal 11 2021.04.22 09:48:00.686110 [ 2705 ] {} BaseDaemon: ######################################## 2021.04.22 09:48:00.686336 [ 2705 ] {} BaseDaemon: (version 21.6.1.1, build id: 6459E84DFCF8E778546C5AD2FFE91B3AD71E1B1B) (from thread 2619) (no query) Received signal Segmentation fault (11) 2021.04.22 09:48:00.686572 [ 2705 ] {} BaseDaemon: Address: NULL pointer. Access: read. Address not mapped to object. 2021.04.22 09:48:00.686686 [ 2705 ] {} BaseDaemon: Stack trace: 0x1c2585d7 0x1c254f66 0x1bb7e403 0x1bb58923 0x1bb56a85 0x1c6840ef 0x1c691148 0x2061a05c 0x2061a8e4 0x20775a03 0x207722bd 0x20771048 0x7f6e5c25be25 0x7f6e5bd81bad 2021.04.22 09:48:02.283045 [ 2705 ] {} BaseDaemon: 4. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1449: DB::(anonymous namespace)::checkVersionColumnTypesConversion(DB::IDataType const*, DB::IDataType const*, std::__1::basic_string, std::__1::allocator >) @ 0x1c2585d7 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:03.714451 [ 2705 ] {} BaseDaemon: 5. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1582: DB::MergeTreeData::checkAlterIsPossible(DB::AlterCommands const&, std::__1::shared_ptr) const @ 0x1c254f66 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:04.692949 [ 2705 ] {} BaseDaemon: 6. /mnt/disk4/hewenting/ClickHouse/src/src/Interpreters/InterpreterAlterQuery.cpp:144: DB::InterpreterAlterQuery::execute() @ 0x1bb7e403 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server ```. [#23483](https://github.com/ClickHouse/ClickHouse/pull/23483) ([hexiaoting](https://github.com/hexiaoting)). +* Fix `columns` function when multiple joins in select query. Closes [#22736](https://github.com/ClickHouse/ClickHouse/issues/22736). [#23501](https://github.com/ClickHouse/ClickHouse/pull/23501) ([Maksim Kita](https://github.com/kitaisreal)). +* * Fix bug with `Join` and `WITH TOTALS`, close [#17718](https://github.com/ClickHouse/ClickHouse/issues/17718). [#23549](https://github.com/ClickHouse/ClickHouse/pull/23549) ([Vladimir C](https://github.com/vdimir)). +* Fix restart / stop command hanging. Closes [#20214](https://github.com/ClickHouse/ClickHouse/issues/20214). [#23552](https://github.com/ClickHouse/ClickHouse/pull/23552) ([filimonov](https://github.com/filimonov)). +* Fix misinterpretation of some `LIKE` expressions with escape sequences. [#23610](https://github.com/ClickHouse/ClickHouse/pull/23610) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed server fault when inserting data through HTTP caused an exception. This fixes [#23512](https://github.com/ClickHouse/ClickHouse/issues/23512). [#23643](https://github.com/ClickHouse/ClickHouse/pull/23643) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added an exception in case of completely the same values in both samples in aggregate function `mannWhitneyUTest`. This fixes [#23646](https://github.com/ClickHouse/ClickHouse/issues/23646). [#23654](https://github.com/ClickHouse/ClickHouse/pull/23654) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixed a bug in recovery of staled `ReplicatedMergeTree` replica. Some metadata updates could be ignored by staled replica if `ALTER` query was executed during downtime of the replica. [#23742](https://github.com/ClickHouse/ClickHouse/pull/23742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Avoid possible "Cannot schedule a task" error (in case some exception had been occurred) on INSERT into Distributed. [#23744](https://github.com/ClickHouse/ClickHouse/pull/23744) ([Azat Khuzhin](https://github.com/azat)). +* Fix `heap_use_after_free` when reading from hdfs if `Values` format is used. [#23761](https://github.com/ClickHouse/ClickHouse/pull/23761) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash when `PREWHERE` and row policy filter are both in effect with empty result. [#23763](https://github.com/ClickHouse/ClickHouse/pull/23763) ([Amos Bird](https://github.com/amosbird)). +* Fixed remote JDBC bridge timeout connection issue. Closes [#9609](https://github.com/ClickHouse/ClickHouse/issues/9609). [#23771](https://github.com/ClickHouse/ClickHouse/pull/23771) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `CLEAR COLUMN` does not work when it is referenced by materialized view. Close [#23764](https://github.com/ClickHouse/ClickHouse/issues/23764). [#23781](https://github.com/ClickHouse/ClickHouse/pull/23781) ([flynn](https://github.com/ucasfl)). +* Fix error `Can't initialize pipeline with empty pipe` for queries with `GLOBAL IN/JOIN` and `use_hedged_requests`. Fixes [#23431](https://github.com/ClickHouse/ClickHouse/issues/23431). [#23805](https://github.com/ClickHouse/ClickHouse/pull/23805) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Better handling of URI's in `PocoHTTPClient`. Fixed bug with URLs containing `+` symbol, data with such keys could not be read previously. [#23822](https://github.com/ClickHouse/ClickHouse/pull/23822) ([Vladimir Chebotarev](https://github.com/excitoon)). +* HashedDictionary complex key update field initial load fix. Closes [#23800](https://github.com/ClickHouse/ClickHouse/issues/23800). [#23824](https://github.com/ClickHouse/ClickHouse/pull/23824) ([Maksim Kita](https://github.com/kitaisreal)). +* Better handling of HTTP errors in `PocoHTTPClient`. Response bodies of HTTP errors were being ignored earlier. [#23844](https://github.com/ClickHouse/ClickHouse/pull/23844) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix `distributed_group_by_no_merge=2` with `GROUP BY` and aggregate function wrapped into regular function (had been broken in [#23546](https://github.com/ClickHouse/ClickHouse/issues/23546)). Throw exception in case of someone trying to use `distributed_group_by_no_merge=2` with window functions. Disable `optimize_distributed_group_by_sharding_key` for queries with window functions. [#23906](https://github.com/ClickHouse/ClickHouse/pull/23906) ([Azat Khuzhin](https://github.com/azat)). +* Fix implementation of connection pool of PostgreSQL engine. Closes [#23897](https://github.com/ClickHouse/ClickHouse/issues/23897). [#23909](https://github.com/ClickHouse/ClickHouse/pull/23909) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix keys metrics accounting for CACHE() dictionary with duplicates in the source (leads to `DictCacheKeysRequestedMiss` overflows). [#23929](https://github.com/ClickHouse/ClickHouse/pull/23929) ([Azat Khuzhin](https://github.com/azat)). +* Fix SIGSEGV for external GROUP BY and overflow row (i.e. queries like `SELECT FROM GROUP BY WITH TOTALS SETTINGS max_bytes_before_external_group_by>0, max_rows_to_group_by>0, group_by_overflow_mode='any', totals_mode='before_having'`). [#23962](https://github.com/ClickHouse/ClickHouse/pull/23962) ([Azat Khuzhin](https://github.com/azat)). +* Some `ALTER PARTITION` queries might cause `Part A intersects previous part B` and `Unexpected merged part C intersecting drop range D` errors in replication queue. It's fixed. Fixes [#23296](https://github.com/ClickHouse/ClickHouse/issues/23296). [#23997](https://github.com/ClickHouse/ClickHouse/pull/23997) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix crash in MergeJoin, close [#24010](https://github.com/ClickHouse/ClickHouse/issues/24010). [#24013](https://github.com/ClickHouse/ClickHouse/pull/24013) ([Vladimir C](https://github.com/vdimir)). +* now64() supports optional timezone argument ... [#24091](https://github.com/ClickHouse/ClickHouse/pull/24091) ([Vasily Nemkov](https://github.com/Enmk)). +* Fixed using const `DateTime` value vs `DateTime64` column in WHERE. ... [#24100](https://github.com/ClickHouse/ClickHouse/pull/24100) ([Vasily Nemkov](https://github.com/Enmk)). +* Bug: explain pipeline with` select xxx final `shows wrong pipeline: ``` dell123 :) explain pipeline select z from prewhere_move_select_final final;. [#24116](https://github.com/ClickHouse/ClickHouse/pull/24116) ([hexiaoting](https://github.com/hexiaoting)). +* Fix a rare bug that could lead to a partially initialized table that can serve write requests (insert/alter/so on). Now such tables will be in readonly mode. [#24122](https://github.com/ClickHouse/ClickHouse/pull/24122) ([alesapin](https://github.com/alesapin)). +* Fix race condition which could happen in RBAC under a heavy load. This PR fixes [#24090](https://github.com/ClickHouse/ClickHouse/issues/24090), [#24134](https://github.com/ClickHouse/ClickHouse/issues/24134),. [#24176](https://github.com/ClickHouse/ClickHouse/pull/24176) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update nested column with const condition will make server crash. ``` CREATE TABLE test_wide_nested ( `id` Int, `info.id` Array(Int), `info.name` Array(String), `info.age` Array(Int) ) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; set mutations_sync = 1;. [#24183](https://github.com/ClickHouse/ClickHouse/pull/24183) ([hexiaoting](https://github.com/hexiaoting)). +* Fix abnormal server termination due to hdfs becoming not accessible during query execution. Closes [#24117](https://github.com/ClickHouse/ClickHouse/issues/24117). [#24191](https://github.com/ClickHouse/ClickHouse/pull/24191) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix wrong typo at StorageMemory, this bug was introduced at [#15127](https://github.com/ClickHouse/ClickHouse/issues/15127), now fixed, Closes [#24192](https://github.com/ClickHouse/ClickHouse/issues/24192). [#24193](https://github.com/ClickHouse/ClickHouse/pull/24193) ([张中南](https://github.com/plugine)). + +#### Build/Testing/Packaging Improvement +* Adding Map type tests in TestFlows. [#21087](https://github.com/ClickHouse/ClickHouse/pull/21087) ([vzakaznikov](https://github.com/vzakaznikov)). +* Testflows tests for DateTime64 Extended Range. [#22729](https://github.com/ClickHouse/ClickHouse/pull/22729) ([Andrey Zvonov](https://github.com/zvonand)). +* CMake will be failed with settings as bellow ` -DENABLE_CASSANDRA=OFF -DENABLE_AMQPCPP=ON ` ... [#22984](https://github.com/ClickHouse/ClickHouse/pull/22984) ([Ben](https://github.com/benbiti)). +* Add simple tool for benchmarking [Zoo]Keeper. [#23038](https://github.com/ClickHouse/ClickHouse/pull/23038) ([alesapin](https://github.com/alesapin)). +* Remove a source of nondeterminism from build. Now builds at different point of time will produce byte-identical binaries. Partially addressed [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#23559](https://github.com/ClickHouse/ClickHouse/pull/23559) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid possible build dependency on locale and filesystem order. This allows reproducible builds. [#23600](https://github.com/ClickHouse/ClickHouse/pull/23600) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Always enable asynchronous-unwind-tables explicitly. It may fix query profiler on AArch64. [#23602](https://github.com/ClickHouse/ClickHouse/pull/23602) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Memory Sanitizer report in GRPC library. This closes [#19234](https://github.com/ClickHouse/ClickHouse/issues/19234). [#23615](https://github.com/ClickHouse/ClickHouse/pull/23615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Window functions tests in TestFlows. [#23704](https://github.com/ClickHouse/ClickHouse/pull/23704) ([vzakaznikov](https://github.com/vzakaznikov)). +* Adds support for building on Solaris-derived operating systems. [#23746](https://github.com/ClickHouse/ClickHouse/pull/23746) ([bnaecker](https://github.com/bnaecker)). +* Update librdkafka 1.6.0-RC3 to 1.6.1. [#23874](https://github.com/ClickHouse/ClickHouse/pull/23874) ([filimonov](https://github.com/filimonov)). +* Enabling running of all TestFlows modules in parallel. [#23942](https://github.com/ClickHouse/ClickHouse/pull/23942) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fixing window functions distributed tests by moving to a deterministic sharding key. [#23975](https://github.com/ClickHouse/ClickHouse/pull/23975) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add more benchmarks for hash tables, including the Swiss Table from Google (that appeared to be slower than ClickHouse hash map in our specific usage scenario). [#24111](https://github.com/ClickHouse/ClickHouse/pull/24111) ([Maksim Kita](https://github.com/kitaisreal)). +* Support building on Illumos. [#24144](https://github.com/ClickHouse/ClickHouse/pull/24144) ([bnaecker](https://github.com/bnaecker)). + +#### Other +* Automated backporting now looks at the label 'release' of PRs to consider the release branch. [#23363](https://github.com/ClickHouse/ClickHouse/pull/23363) ([Ivan](https://github.com/abyss7)). +* Add test cases for arrayElement. related issue: [#22765](https://github.com/ClickHouse/ClickHouse/issues/22765). [#23484](https://github.com/ClickHouse/ClickHouse/pull/23484) ([hexiaoting](https://github.com/hexiaoting)). +* Rename uniqThetaSketch (https://github.com/ClickHouse/ClickHouse/issues/14893) to uniqTheta. [#24019](https://github.com/ClickHouse/ClickHouse/pull/24019) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Function `arrayFold` for folding over array with accumulator"'. [#23248](https://github.com/ClickHouse/ClickHouse/pull/23248) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "[RFC] Fix memory tracking with min_bytes_to_use_mmap_io"'. [#23276](https://github.com/ClickHouse/ClickHouse/pull/23276) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "add uniqThetaSketch"'. [#23334](https://github.com/ClickHouse/ClickHouse/pull/23334) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix CI build for gcc-10"'. [#23772](https://github.com/ClickHouse/ClickHouse/pull/23772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Update syntax.md'. [#24267](https://github.com/ClickHouse/ClickHouse/pull/24267) ([lulichao](https://github.com/lulichao)). + +#### New Feature #14893 + +* - Add uniqThetaSketch to support [Theta Sketch](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html) in ClickHouse. [#22609](https://github.com/ClickHouse/ClickHouse/pull/22609) ([Ping Yu](https://github.com/pingyu)). +* - Add uniqThetaSketch to support [Theta Sketch](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html) in ClickHouse. [#23894](https://github.com/ClickHouse/ClickHouse/pull/23894) ([Ping Yu](https://github.com/pingyu)). + diff --git a/docs/changelogs/v21.6.2.7-prestable.md b/docs/changelogs/v21.6.2.7-prestable.md new file mode 100644 index 00000000000..c5dec251786 --- /dev/null +++ b/docs/changelogs/v21.6.2.7-prestable.md @@ -0,0 +1,13 @@ +### ClickHouse release v21.6.2.7-prestable FIXME as compared to v21.6.1.6891-prestable + +#### Bug Fix +* Backported in [#24567](https://github.com/ClickHouse/ClickHouse/issues/24567): Use old modulo function version when used in partition key. Closes [#23508](https://github.com/ClickHouse/ClickHouse/issues/23508). [#24157](https://github.com/ClickHouse/ClickHouse/pull/24157) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#24367](https://github.com/ClickHouse/ClickHouse/issues/24367): Set `max_threads = 1` to fix mutation fail of StorageMemory. Closes [#24274](https://github.com/ClickHouse/ClickHouse/issues/24274). [#24275](https://github.com/ClickHouse/ClickHouse/pull/24275) ([flynn](https://github.com/ucasfl)). +* Backported in [#24383](https://github.com/ClickHouse/ClickHouse/issues/24383): Allow empty HTTP headers. Fixes [#23901](https://github.com/ClickHouse/ClickHouse/issues/23901). [#24285](https://github.com/ClickHouse/ClickHouse/pull/24285) ([Ivan](https://github.com/abyss7)). +* Backported in [#24365](https://github.com/ClickHouse/ClickHouse/issues/24365): Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24510](https://github.com/ClickHouse/ClickHouse/issues/24510): Fix drop partition with intersect fake parts. In rare cases there might be parts with mutation version greater than current block number. [#24321](https://github.com/ClickHouse/ClickHouse/pull/24321) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#24542](https://github.com/ClickHouse/ClickHouse/issues/24542): Fix incorrect monotonicity of toWeek function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/5212 , and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#24556](https://github.com/ClickHouse/ClickHouse/issues/24556): Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` is being processed infinitely. This was detected on server with extremely little amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#24597](https://github.com/ClickHouse/ClickHouse/issues/24597): Fix usage of tuples in `CREATE .. AS SELECT` queries. [#24464](https://github.com/ClickHouse/ClickHouse/pull/24464) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#24600](https://github.com/ClickHouse/ClickHouse/issues/24600): Enable reading of subcolumns for distributed tables. [#24472](https://github.com/ClickHouse/ClickHouse/pull/24472) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v21.6.3.14-stable.md b/docs/changelogs/v21.6.3.14-stable.md new file mode 100644 index 00000000000..8d2e2a03320 --- /dev/null +++ b/docs/changelogs/v21.6.3.14-stable.md @@ -0,0 +1,14 @@ +### ClickHouse release v21.6.3.14-stable FIXME as compared to v21.6.2.7-prestable + +#### Improvement +* Backported in [#24531](https://github.com/ClickHouse/ClickHouse/issues/24531): Fix Zero-Copy replication with several S3 volumes (Fixes [#22679](https://github.com/ClickHouse/ClickHouse/issues/22679)). [#22864](https://github.com/ClickHouse/ClickHouse/pull/22864) ([ianton-ru](https://github.com/ianton-ru)). +* Backported in [#24776](https://github.com/ClickHouse/ClickHouse/issues/24776): Avoid hiding errors like `Limit for rows or bytes to read exceeded` for scalar subqueries. [#24545](https://github.com/ClickHouse/ClickHouse/pull/24545) ([nvartolomei](https://github.com/nvartolomei)). + +#### Bug Fix +* Backported in [#24727](https://github.com/ClickHouse/ClickHouse/issues/24727): In "multipart/form-data" message consider the CRLF preceding a boundary as part of it. Fixes [#23905](https://github.com/ClickHouse/ClickHouse/issues/23905). [#24399](https://github.com/ClickHouse/ClickHouse/pull/24399) ([Ivan](https://github.com/abyss7)). +* Backported in [#24827](https://github.com/ClickHouse/ClickHouse/issues/24827): - Fixed the deadlock that can happen during LDAP role (re)mapping, when LDAP group is mapped to a nonexistent local role. [#24431](https://github.com/ClickHouse/ClickHouse/pull/24431) ([Denis Glazachev](https://github.com/traceon)). +* Backported in [#24936](https://github.com/ClickHouse/ClickHouse/issues/24936): - If ZooKeeper connection was lost and replica was cloned after restoring the connection, its replication queue might contain outdated entries. It's fixed. - Fixed crash when replication queue contains intersecting virtual parts. It may rarely happen if some data part was lost. Print error in log instead of terminating. [#24777](https://github.com/ClickHouse/ClickHouse/pull/24777) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24851](https://github.com/ClickHouse/ClickHouse/issues/24851): Fix bug when exception `Mutation was killed` can be thrown to the client on mutation wait when mutation not loaded into memory yet. [#24809](https://github.com/ClickHouse/ClickHouse/pull/24809) ([alesapin](https://github.com/alesapin)). +* Backported in [#24916](https://github.com/ClickHouse/ClickHouse/issues/24916): Fix extremely rare bug on low-memory servers which can lead to the inability to perform merges without restart. Possibly fixes [#24603](https://github.com/ClickHouse/ClickHouse/issues/24603). [#24872](https://github.com/ClickHouse/ClickHouse/pull/24872) ([alesapin](https://github.com/alesapin)). +* Backported in [#24952](https://github.com/ClickHouse/ClickHouse/issues/24952): Fix possible heap-buffer-overflow in Arrow. [#24922](https://github.com/ClickHouse/ClickHouse/pull/24922) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v21.6.4.26-stable.md b/docs/changelogs/v21.6.4.26-stable.md new file mode 100644 index 00000000000..9bf7d0e68cb --- /dev/null +++ b/docs/changelogs/v21.6.4.26-stable.md @@ -0,0 +1,14 @@ +### ClickHouse release v21.6.4.26-stable FIXME as compared to v21.6.3.14-stable + +#### Bug Fix +* Backported in [#24969](https://github.com/ClickHouse/ClickHouse/issues/24969): Allow NULL values in postgresql protocol. Closes [#22622](https://github.com/ClickHouse/ClickHouse/issues/22622). [#24857](https://github.com/ClickHouse/ClickHouse/pull/24857) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#25186](https://github.com/ClickHouse/ClickHouse/issues/25186): Fixed bug with declaring S3 disk at root of bucket. Earlier, it reported an error: ``` [heather] 2021.05.10 02:11:11.932234 [ 72790 ] {2ff80b7b-ec53-41cb-ac35-19bb390e1759} executeQuery: Code: 36, e.displayText() = DB::Exception: Key name is empty in path style S3 URI: (http://172.17.0.2/bucket/) (version 21.6.1.1) (from 127.0.0.1:47994) (in query: SELECT policy_name FROM system.storage_policies), Stack trace (when copying this message, always include the lines below):. [#24898](https://github.com/ClickHouse/ClickHouse/pull/24898) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#25023](https://github.com/ClickHouse/ClickHouse/issues/25023): Fix limit/offset settings for distributed queries (ignore on the remote nodes). [#24940](https://github.com/ClickHouse/ClickHouse/pull/24940) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#25049](https://github.com/ClickHouse/ClickHouse/issues/25049): Fix extremely rare error `Tagging already tagged part` in replication queue during concurrent `alter move/replace partition`. Possibly fixes [#22142](https://github.com/ClickHouse/ClickHouse/issues/22142). [#24961](https://github.com/ClickHouse/ClickHouse/pull/24961) ([alesapin](https://github.com/alesapin)). +* Backported in [#25081](https://github.com/ClickHouse/ClickHouse/issues/25081): Fix wrong result when using aggregate projection with **not empty** `GROUP BY` key to execute query with `GROUP BY` by **empty** key. [#25055](https://github.com/ClickHouse/ClickHouse/pull/25055) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#25107](https://github.com/ClickHouse/ClickHouse/issues/25107): Fix bug which allows creating tables with columns referencing themselves like `a UInt32 ALIAS a + 1` or `b UInt32 MATERIALIZED b`. Fixes [#24910](https://github.com/ClickHouse/ClickHouse/issues/24910), [#24292](https://github.com/ClickHouse/ClickHouse/issues/24292). [#25059](https://github.com/ClickHouse/ClickHouse/pull/25059) ([alesapin](https://github.com/alesapin)). +* Backported in [#25106](https://github.com/ClickHouse/ClickHouse/issues/25106): Fix bug with constant maps in mapContains that lead to error `empty column was returned by function mapContains`. Closes [#25077](https://github.com/ClickHouse/ClickHouse/issues/25077). [#25080](https://github.com/ClickHouse/ClickHouse/pull/25080) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25143](https://github.com/ClickHouse/ClickHouse/issues/25143): Fix crash in query with cross join and `joined_subquery_requires_alias = 0`. Fixes [#24011](https://github.com/ClickHouse/ClickHouse/issues/24011). [#25082](https://github.com/ClickHouse/ClickHouse/pull/25082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25157](https://github.com/ClickHouse/ClickHouse/issues/25157): Fix possible parts loss after updating up to 21.5 in case table used `UUID` in partition key. (It is not recommended to use `UUID` in partition key). Fixes [#25070](https://github.com/ClickHouse/ClickHouse/issues/25070). [#25127](https://github.com/ClickHouse/ClickHouse/pull/25127) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25182](https://github.com/ClickHouse/ClickHouse/issues/25182): Do not use table's projection for `SELECT` with `FINAL`. It is not supported yet. [#25163](https://github.com/ClickHouse/ClickHouse/pull/25163) ([Amos Bird](https://github.com/amosbird)). + diff --git a/docs/changelogs/v21.6.5.37-stable.md b/docs/changelogs/v21.6.5.37-stable.md new file mode 100644 index 00000000000..3d03c31c7e8 --- /dev/null +++ b/docs/changelogs/v21.6.5.37-stable.md @@ -0,0 +1,21 @@ +### ClickHouse release v21.6.5.37-stable FIXME as compared to v21.6.4.26-stable + +#### Improvement +* Backported in [#25221](https://github.com/ClickHouse/ClickHouse/issues/25221): Here will be listed all the bugs that I am gonna to fix in this PR. [#23518](https://github.com/ClickHouse/ClickHouse/pull/23518) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix +* Backported in [#25480](https://github.com/ClickHouse/ClickHouse/issues/25480): Fix "Missing columns" exception when joining Distributed Materialized View. [#24870](https://github.com/ClickHouse/ClickHouse/pull/24870) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#25366](https://github.com/ClickHouse/ClickHouse/issues/25366): Fix serialization of splitted nested messages in Protobuf format. This PR fixes [#24647](https://github.com/ClickHouse/ClickHouse/issues/24647). [#25000](https://github.com/ClickHouse/ClickHouse/pull/25000) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#25102](https://github.com/ClickHouse/ClickHouse/issues/25102): Distinguish KILL MUTATION for different tables (fixes unexpected `Cancelled mutating parts` error). [#25025](https://github.com/ClickHouse/ClickHouse/pull/25025) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#25213](https://github.com/ClickHouse/ClickHouse/issues/25213): Fixed an error which occurred while inserting a subset of columns using CSVWithNames format. Fixes [#25129](https://github.com/ClickHouse/ClickHouse/issues/25129). [#25169](https://github.com/ClickHouse/ClickHouse/pull/25169) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#25352](https://github.com/ClickHouse/ClickHouse/issues/25352): Fix TOCTOU error in installation script. [#25277](https://github.com/ClickHouse/ClickHouse/pull/25277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25385](https://github.com/ClickHouse/ClickHouse/issues/25385): Fix incorrect behaviour and UBSan report in big integers. In previous versions `CAST(1e19 AS UInt128)` returned zero. [#25279](https://github.com/ClickHouse/ClickHouse/pull/25279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25470](https://github.com/ClickHouse/ClickHouse/issues/25470): Fix joinGetOrNull with not-nullable columns. This fixes [#24261](https://github.com/ClickHouse/ClickHouse/issues/24261). [#25288](https://github.com/ClickHouse/ClickHouse/pull/25288) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#25360](https://github.com/ClickHouse/ClickHouse/issues/25360): Fix error `Bad cast from type DB::ColumnLowCardinality to DB::ColumnVector` for queries where `LowCardinality` argument was used for IN (this bug appeared in 21.6). Fixes [#25187](https://github.com/ClickHouse/ClickHouse/issues/25187). [#25290](https://github.com/ClickHouse/ClickHouse/pull/25290) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25356](https://github.com/ClickHouse/ClickHouse/issues/25356): Fix Logical Error Cannot sum Array/Tuple in min/maxMap. [#25298](https://github.com/ClickHouse/ClickHouse/pull/25298) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25436](https://github.com/ClickHouse/ClickHouse/issues/25436): Support `SimpleAggregateFunction(LowCardinality)` for `SummingMergeTree`. Fixes [#25134](https://github.com/ClickHouse/ClickHouse/issues/25134). [#25300](https://github.com/ClickHouse/ClickHouse/pull/25300) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25363](https://github.com/ClickHouse/ClickHouse/issues/25363): On ZooKeeper connection loss `ReplicatedMergeTree` table might wait for background operations to complete before trying to reconnect. It's fixed, now background operations are stopped forcefully. [#25306](https://github.com/ClickHouse/ClickHouse/pull/25306) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25388](https://github.com/ClickHouse/ClickHouse/issues/25388): Fix the possibility of non-deterministic behaviour of the `quantileDeterministic` function and similar. This closes [#20480](https://github.com/ClickHouse/ClickHouse/issues/20480). [#25313](https://github.com/ClickHouse/ClickHouse/pull/25313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25448](https://github.com/ClickHouse/ClickHouse/issues/25448): Fix lost `WHERE` condition in expression-push-down optimization of query plan (setting `query_plan_filter_push_down = 1` by default). Fixes [#25368](https://github.com/ClickHouse/ClickHouse/issues/25368). [#25370](https://github.com/ClickHouse/ClickHouse/pull/25370) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25407](https://github.com/ClickHouse/ClickHouse/issues/25407): Fix `REPLACE` column transformer when used in DDL by correctly quoting the formated query. This fixes [#23925](https://github.com/ClickHouse/ClickHouse/issues/23925). [#25391](https://github.com/ClickHouse/ClickHouse/pull/25391) ([Amos Bird](https://github.com/amosbird)). + diff --git a/docs/changelogs/v21.6.6.51-stable.md b/docs/changelogs/v21.6.6.51-stable.md new file mode 100644 index 00000000000..55f1fd46119 --- /dev/null +++ b/docs/changelogs/v21.6.6.51-stable.md @@ -0,0 +1,19 @@ +### ClickHouse release v21.6.6.51-stable FIXME as compared to v21.6.5.37-stable + +#### Bug Fix +* Backported in [#25850](https://github.com/ClickHouse/ClickHouse/issues/25850): `CAST` from `Date` to `DateTime` (or `DateTime64`) was not using the timezone of the `DateTime` type. It can also affect the comparison between `Date` and `DateTime`. Inference of the common type for `Date` and `DateTime` also was not using the corresponding timezone. It affected the results of function `if` and array construction. Closes [#24128](https://github.com/ClickHouse/ClickHouse/issues/24128). [#24129](https://github.com/ClickHouse/ClickHouse/pull/24129) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#25677](https://github.com/ClickHouse/ClickHouse/issues/25677): Fixed bug in deserialization of random generator state with might cause some data types such as `AggregateFunction(groupArraySample(N), T))` to behave in a non-deterministic way. [#24538](https://github.com/ClickHouse/ClickHouse/pull/24538) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25535](https://github.com/ClickHouse/ClickHouse/issues/25535): Fixed possible error 'Cannot read from istream at offset 0' when reading a file from DiskS3. [#24885](https://github.com/ClickHouse/ClickHouse/pull/24885) ([Pavel Kovalenko](https://github.com/Jokser)). +* Backported in [#25557](https://github.com/ClickHouse/ClickHouse/issues/25557): Fix potential crash when calculating aggregate function states by aggregation of aggregate function states of other aggregate functions (not a practical use case). See [#24523](https://github.com/ClickHouse/ClickHouse/issues/24523). [#25015](https://github.com/ClickHouse/ClickHouse/pull/25015) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25506](https://github.com/ClickHouse/ClickHouse/issues/25506): Fix segfault when sharding_key is absent in task config for copier. [#25419](https://github.com/ClickHouse/ClickHouse/pull/25419) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#25765](https://github.com/ClickHouse/ClickHouse/issues/25765): Fix assertion in PREWHERE with non-uint8 type, close [#19589](https://github.com/ClickHouse/ClickHouse/issues/19589). [#25484](https://github.com/ClickHouse/ClickHouse/pull/25484) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#25635](https://github.com/ClickHouse/ClickHouse/issues/25635): Fix wrong totals for query `WITH TOTALS` and `WITH FILL`. Fixes [#20872](https://github.com/ClickHouse/ClickHouse/issues/20872). [#25539](https://github.com/ClickHouse/ClickHouse/pull/25539) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#25652](https://github.com/ClickHouse/ClickHouse/issues/25652): Fix null pointer dereference in `EXPLAIN AST` without query. [#25631](https://github.com/ClickHouse/ClickHouse/pull/25631) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25717](https://github.com/ClickHouse/ClickHouse/issues/25717): `REPLACE PARTITION` might be ignored in rare cases if the source partition was empty. It's fixed. Fixes [#24869](https://github.com/ClickHouse/ClickHouse/issues/24869). [#25665](https://github.com/ClickHouse/ClickHouse/pull/25665) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25696](https://github.com/ClickHouse/ClickHouse/issues/25696): Fixed `No such file or directory` error on moving `Distributed` table between databases. Fixes [#24971](https://github.com/ClickHouse/ClickHouse/issues/24971). [#25667](https://github.com/ClickHouse/ClickHouse/pull/25667) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#25755](https://github.com/ClickHouse/ClickHouse/issues/25755): Fix data race when querying `system.clusters` while reloading the cluster configuration at the same time. [#25737](https://github.com/ClickHouse/ClickHouse/pull/25737) ([Amos Bird](https://github.com/amosbird)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Partial backport [#24061](https://github.com/ClickHouse/ClickHouse/issues/24061) to 21.6'. [#25621](https://github.com/ClickHouse/ClickHouse/pull/25621) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v21.6.7.57-stable.md b/docs/changelogs/v21.6.7.57-stable.md new file mode 100644 index 00000000000..5ef9026794b --- /dev/null +++ b/docs/changelogs/v21.6.7.57-stable.md @@ -0,0 +1,8 @@ +### ClickHouse release v21.6.7.57-stable FIXME as compared to v21.6.6.51-stable + +#### Bug Fix +* Backported in [#25955](https://github.com/ClickHouse/ClickHouse/issues/25955): Fix `ALTER MODIFY COLUMN` of columns, which participates in TTL expressions. [#25554](https://github.com/ClickHouse/ClickHouse/pull/25554) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#25959](https://github.com/ClickHouse/ClickHouse/issues/25959): Fix extremely long backoff for background tasks when the background pool is full. Fixes [#25836](https://github.com/ClickHouse/ClickHouse/issues/25836). [#25893](https://github.com/ClickHouse/ClickHouse/pull/25893) ([alesapin](https://github.com/alesapin)). +* Backported in [#26096](https://github.com/ClickHouse/ClickHouse/issues/26096): Fix wrong thread estimation for right subquery join in some cases. Close [#24075](https://github.com/ClickHouse/ClickHouse/issues/24075). [#26052](https://github.com/ClickHouse/ClickHouse/pull/26052) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#26143](https://github.com/ClickHouse/ClickHouse/issues/26143): Fix possible crash in `pointInPolygon` if the setting `validate_polygons` is turned off. [#26113](https://github.com/ClickHouse/ClickHouse/pull/26113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.6.8.62-stable.md b/docs/changelogs/v21.6.8.62-stable.md new file mode 100644 index 00000000000..1357c762181 --- /dev/null +++ b/docs/changelogs/v21.6.8.62-stable.md @@ -0,0 +1,9 @@ +### ClickHouse release v21.6.8.62-stable FIXME as compared to v21.6.7.57-stable + +#### Bug Fix +* Backported in [#26194](https://github.com/ClickHouse/ClickHouse/issues/26194): Fix sharding_key from column w/o function for remote() (before `select * from remote('127.1', system.one, dummy)` leads to `Unknown column: dummy, there are only columns .` error). [#25824](https://github.com/ClickHouse/ClickHouse/pull/25824) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#26108](https://github.com/ClickHouse/ClickHouse/issues/26108): Fix rare server crash because of `abort` in ZooKeeper client. Fixes [#25813](https://github.com/ClickHouse/ClickHouse/issues/25813). [#26079](https://github.com/ClickHouse/ClickHouse/pull/26079) ([alesapin](https://github.com/alesapin)). +* Backported in [#26167](https://github.com/ClickHouse/ClickHouse/issues/26167): Fix `joinGet` with LowCarinality columns, close [#25993](https://github.com/ClickHouse/ClickHouse/issues/25993). [#26118](https://github.com/ClickHouse/ClickHouse/pull/26118) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#26205](https://github.com/ClickHouse/ClickHouse/issues/26205): Fix potential crash if more than one `untuple` expression is used. [#26179](https://github.com/ClickHouse/ClickHouse/pull/26179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#26227](https://github.com/ClickHouse/ClickHouse/issues/26227): Remove excessive newline in `thread_name` column in `system.stack_trace` table. This fixes [#24124](https://github.com/ClickHouse/ClickHouse/issues/24124). [#26210](https://github.com/ClickHouse/ClickHouse/pull/26210) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.6.9.7-stable.md b/docs/changelogs/v21.6.9.7-stable.md new file mode 100644 index 00000000000..ecac9dd75ef --- /dev/null +++ b/docs/changelogs/v21.6.9.7-stable.md @@ -0,0 +1,43 @@ +### ClickHouse release v21.6.9.7-stable FIXME as compared to v21.6.8.62-stable + +#### Improvement +* Backported in [#27129](https://github.com/ClickHouse/ClickHouse/issues/27129): If SSDDictionary is created with DDL query, it can be created only inside user_files directory. [#24466](https://github.com/ClickHouse/ClickHouse/pull/24466) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Bug Fix +* Backported in [#26857](https://github.com/ClickHouse/ClickHouse/issues/26857): ParallelFormattingOutputFormat: Use mutex to handle the join to the collector_thread (https://github.com/ClickHouse/ClickHouse/issues/26694). [#26703](https://github.com/ClickHouse/ClickHouse/pull/26703) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#26939](https://github.com/ClickHouse/ClickHouse/issues/26939): Do not remove data on ReplicatedMergeTree table shutdown to avoid creating data to metadata inconsistency. [#26716](https://github.com/ClickHouse/ClickHouse/pull/26716) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#26985](https://github.com/ClickHouse/ClickHouse/issues/26985): Aggregate function parameters might be lost when applying some combinators causing exceptions like `Conversion from AggregateFunction(topKArray, Array(String)) to AggregateFunction(topKArray(10), Array(String)) is not supported`. It's fixed. Fixes [#26196](https://github.com/ClickHouse/ClickHouse/issues/26196) and [#26433](https://github.com/ClickHouse/ClickHouse/issues/26433). [#26814](https://github.com/ClickHouse/ClickHouse/pull/26814) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26910](https://github.com/ClickHouse/ClickHouse/issues/26910): Fix library-bridge ids load. [#26834](https://github.com/ClickHouse/ClickHouse/pull/26834) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#26946](https://github.com/ClickHouse/ClickHouse/issues/26946): Fix error `Missing columns: 'xxx'` when `DEFAULT` column references other non materialized column without `DEFAULT` expression. Fixes [#26591](https://github.com/ClickHouse/ClickHouse/issues/26591). [#26900](https://github.com/ClickHouse/ClickHouse/pull/26900) ([alesapin](https://github.com/alesapin)). +* Backported in [#27000](https://github.com/ClickHouse/ClickHouse/issues/27000): Fix reading of custom TLDs (stops processing with lower buffer or bigger file). [#26948](https://github.com/ClickHouse/ClickHouse/pull/26948) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27085](https://github.com/ClickHouse/ClickHouse/issues/27085): Now partition ID in queries like `ALTER TABLE ... PARTITION ID xxx` validates for correctness. Fixes [#25718](https://github.com/ClickHouse/ClickHouse/issues/25718). [#26963](https://github.com/ClickHouse/ClickHouse/pull/26963) ([alesapin](https://github.com/alesapin)). +* Backported in [#27052](https://github.com/ClickHouse/ClickHouse/issues/27052): [RFC] Fix possible mutation stack due to race with DROP_RANGE. [#27002](https://github.com/ClickHouse/ClickHouse/pull/27002) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27159](https://github.com/ClickHouse/ClickHouse/issues/27159): Fix synchronization in GRPCServer This PR fixes [#27024](https://github.com/ClickHouse/ClickHouse/issues/27024). [#27064](https://github.com/ClickHouse/ClickHouse/pull/27064) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27367](https://github.com/ClickHouse/ClickHouse/issues/27367): - Fix uninitialized memory in functions `multiSearch*` with empty array, close [#27169](https://github.com/ClickHouse/ClickHouse/issues/27169). [#27181](https://github.com/ClickHouse/ClickHouse/pull/27181) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#27260](https://github.com/ClickHouse/ClickHouse/issues/27260): In rare cases `system.detached_parts` table might contain incorrect information for some parts, it's fixed. Fixes [#27114](https://github.com/ClickHouse/ClickHouse/issues/27114). [#27183](https://github.com/ClickHouse/ClickHouse/pull/27183) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#27466](https://github.com/ClickHouse/ClickHouse/issues/27466): Fixed incorrect validation of partition id for MergeTree tables that created with old syntax. [#27328](https://github.com/ClickHouse/ClickHouse/pull/27328) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#27646](https://github.com/ClickHouse/ClickHouse/issues/27646): Fix incorrect result for query with row-level security, prewhere and LowCardinality filter. Fixes [#27179](https://github.com/ClickHouse/ClickHouse/issues/27179). [#27329](https://github.com/ClickHouse/ClickHouse/pull/27329) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#27471](https://github.com/ClickHouse/ClickHouse/issues/27471): fix metric BackgroundMessageBrokerSchedulePoolTask, maybe mistyped。. [#27452](https://github.com/ClickHouse/ClickHouse/pull/27452) ([Ben](https://github.com/benbiti)). +* Fixed `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache` configuration parsing. Options `allow_read_expired_keys`, `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds` were not parsed for dictionaries with non `cache` type. [#27523](https://github.com/ClickHouse/ClickHouse/pull/27523) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#27731](https://github.com/ClickHouse/ClickHouse/issues/27731): Fix crash during projection materialization when some parts contain missing columns. This fixes [#27512](https://github.com/ClickHouse/ClickHouse/issues/27512). [#27528](https://github.com/ClickHouse/ClickHouse/pull/27528) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#27978](https://github.com/ClickHouse/ClickHouse/issues/27978): Bugfix for windowFunnel's "strict" mode. This fixes [#27469](https://github.com/ClickHouse/ClickHouse/issues/27469). [#27563](https://github.com/ClickHouse/ClickHouse/pull/27563) ([achimbab](https://github.com/achimbab)). +* Backported in [#27675](https://github.com/ClickHouse/ClickHouse/issues/27675): Fix postgresql table function resulting in non-closing connections. Closes [#26088](https://github.com/ClickHouse/ClickHouse/issues/26088). [#27662](https://github.com/ClickHouse/ClickHouse/pull/27662) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#27699](https://github.com/ClickHouse/ClickHouse/issues/27699): Fix bad type cast when functions like `arrayHas` are applied to arrays of LowCardinality of Nullable of different non-numeric types like `DateTime` and `DateTime64`. In previous versions bad cast occurs. In new version it will lead to exception. This closes [#26330](https://github.com/ClickHouse/ClickHouse/issues/26330). [#27682](https://github.com/ClickHouse/ClickHouse/pull/27682) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#27746](https://github.com/ClickHouse/ClickHouse/issues/27746): Remove duplicated source files in CMakeLists.txt in arrow-cmake. [#27736](https://github.com/ClickHouse/ClickHouse/pull/27736) ([李扬](https://github.com/taiyang-li)). +* Backported in [#27865](https://github.com/ClickHouse/ClickHouse/issues/27865): Prevent crashes for some formats when NULL (tombstone) message was coming from Kafka. Closes [#19255](https://github.com/ClickHouse/ClickHouse/issues/19255). [#27794](https://github.com/ClickHouse/ClickHouse/pull/27794) ([filimonov](https://github.com/filimonov)). +* Backported in [#28348](https://github.com/ClickHouse/ClickHouse/issues/28348): Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). +* Backported in [#27957](https://github.com/ClickHouse/ClickHouse/issues/27957): Fix selecting with extremes from a column of the type `LowCardinality(UUID)`. [#27918](https://github.com/ClickHouse/ClickHouse/pull/27918) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27952](https://github.com/ClickHouse/ClickHouse/issues/27952): Check cluster name before creating Distributed table, do not allow to create a table with incorrect cluster name. Fixes [#27832](https://github.com/ClickHouse/ClickHouse/issues/27832). [#27927](https://github.com/ClickHouse/ClickHouse/pull/27927) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28206](https://github.com/ClickHouse/ClickHouse/issues/28206): Fix cases, when read buffer fails with 'attempt to read after end of file'. Closes [#26149](https://github.com/ClickHouse/ClickHouse/issues/26149). [#28150](https://github.com/ClickHouse/ClickHouse/pull/28150) ([Filatenkov Artur](https://github.com/FArthur-cmd)). + +#### Build/Testing/Packaging Improvement +* Backported in [#28030](https://github.com/ClickHouse/ClickHouse/issues/28030): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#28119](https://github.com/ClickHouse/ClickHouse/issues/28119): Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#28179](https://github.com/ClickHouse/ClickHouse/issues/28179): Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). +* Backported in [#28256](https://github.com/ClickHouse/ClickHouse/issues/28256): Fix incorrect behavior in `clickhouse-keeper` when list watches (`getChildren`) triggered with `set` requests for children. [#28190](https://github.com/ClickHouse/ClickHouse/pull/28190) ([alesapin](https://github.com/alesapin)). +* Backported in [#28264](https://github.com/ClickHouse/ClickHouse/issues/28264): Fix possible read of uninitialized memory for queries with `Nullable(LowCardinality)` type and extremes. Fixes [#28165](https://github.com/ClickHouse/ClickHouse/issues/28165). [#28205](https://github.com/ClickHouse/ClickHouse/pull/28205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#28291](https://github.com/ClickHouse/ClickHouse/issues/28291): Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v21.7.1.7283-prestable.md b/docs/changelogs/v21.7.1.7283-prestable.md new file mode 100644 index 00000000000..52de493c4ab --- /dev/null +++ b/docs/changelogs/v21.7.1.7283-prestable.md @@ -0,0 +1,178 @@ +### ClickHouse release v21.7.1.7283-prestable FIXME as compared to v21.6.1.6891-prestable + +#### Backward Incompatible Change +* Improved performance of queries with explicitly defined large sets. Added compatibility setting `legacy_column_name_of_tuple_literal`. It makes sense to set it to `true`, while doing rolling update of cluster from version lower than 21.7 to any higher version. Otherwise distributed queries with explicitly defined sets at `IN` clause may fail during update. [#25371](https://github.com/ClickHouse/ClickHouse/pull/25371) ([Anton Popov](https://github.com/CurtizJ)). +* Forward/backward incompatible change of maximum buffer size in clickhouse-keeper. Better to do it now (before production), than later. [#25421](https://github.com/ClickHouse/ClickHouse/pull/25421) ([alesapin](https://github.com/alesapin)). + +#### New Feature +* Add support for VFS over HDFS. [#11058](https://github.com/ClickHouse/ClickHouse/pull/11058) ([overshov](https://github.com/overshov)). +* Provides a way to restore replicated table when the data is (possibly) present, but the ZooKeeper metadata is lost. Resolves [#13458](https://github.com/ClickHouse/ClickHouse/issues/13458). [#13652](https://github.com/ClickHouse/ClickHouse/pull/13652) ([Mike Kot](https://github.com/myrrc)). +* Implement `sequenceNextNode()` function useful for `flow analysis`. [#19766](https://github.com/ClickHouse/ClickHouse/pull/19766) ([achimbab](https://github.com/achimbab)). +* Added YAML configuration support to configuration loader. This closes [#3607](https://github.com/ClickHouse/ClickHouse/issues/3607). [#21858](https://github.com/ClickHouse/ClickHouse/pull/21858) ([BoloniniD](https://github.com/BoloniniD)). +* Added dateName function. [#23085](https://github.com/ClickHouse/ClickHouse/pull/23085) ([Daniil Kondratyev](https://github.com/dankondr)). +* Add `quantileBFloat16` aggregate function as well as the corresponding `quantilesBFloat16` and `medianBFloat16`. It is very simple and fast quantile estimator with relative error not more than 0.390625%. This closes [#16641](https://github.com/ClickHouse/ClickHouse/issues/16641). [#23204](https://github.com/ClickHouse/ClickHouse/pull/23204) ([Ivan Novitskiy](https://github.com/RedClusive)). +* Support `ALTER DELETE` queries for `Join` table engine. [#23260](https://github.com/ClickHouse/ClickHouse/pull/23260) ([foolchi](https://github.com/foolchi)). +* Add a new boolean setting `prefer_global_in_and_join` which defaults all IN/JOIN as GLOBAL IN/JOIN. [#23434](https://github.com/ClickHouse/ClickHouse/pull/23434) ([Amos Bird](https://github.com/amosbird)). +* add bitpositionToArray function. [#23843](https://github.com/ClickHouse/ClickHouse/pull/23843) ([kevin wan](https://github.com/MaxWk)). +* Add aggregate function `segmentLengthSum`. [#24250](https://github.com/ClickHouse/ClickHouse/pull/24250) ([flynn](https://github.com/ucasfl)). +* Support structs and maps in Arrow/Parquet/ORC and dictionaries in Arrow input/output formats. Present new setting `output_format_arrow_low_cardinality_as_dictionary`. [#24341](https://github.com/ClickHouse/ClickHouse/pull/24341) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `compile_expression` setting for AARCH64. [#24342](https://github.com/ClickHouse/ClickHouse/pull/24342) ([Maksim Kita](https://github.com/kitaisreal)). +* Now clickhouse-keeper supports ZooKeeper-like `digest` ACLs. [#24448](https://github.com/ClickHouse/ClickHouse/pull/24448) ([alesapin](https://github.com/alesapin)). +* Implements the `h3ToGeo` function. [#24867](https://github.com/ClickHouse/ClickHouse/pull/24867) ([Bharat Nallan](https://github.com/bharatnc)). +* Now query_log has two new columns : initial_query_start_time / initial_query_start_time_microsecond that record the starting time of a distributed query if any. [#25022](https://github.com/ClickHouse/ClickHouse/pull/25022) ([Amos Bird](https://github.com/amosbird)). +* Dictionaries added support for Array type. [#25119](https://github.com/ClickHouse/ClickHouse/pull/25119) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `toJSONString` function to serialize columns to their JSON representations. [#25164](https://github.com/ClickHouse/ClickHouse/pull/25164) ([Amos Bird](https://github.com/amosbird)). +* ClickHouse database created with MaterializeMySQL now contains all column comments from the MySQL database that materialized. [#25199](https://github.com/ClickHouse/ClickHouse/pull/25199) ([Storozhuk Kostiantyn](https://github.com/sand6255)). +* Added function `dateName`. Author [Daniil Kondratyev] (@dankondr). [#25372](https://github.com/ClickHouse/ClickHouse/pull/25372) ([Maksim Kita](https://github.com/kitaisreal)). +* Added function `bitPositionsToArray`. Closes [#23792](https://github.com/ClickHouse/ClickHouse/issues/23792). Author [Kevin Wan] (@MaxWk). [#25394](https://github.com/ClickHouse/ClickHouse/pull/25394) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* (remove from changelog) Integrate and test experimental compression libraries. Will be available under the flag `allow_experimental_codecs`. This closes [#16775](https://github.com/ClickHouse/ClickHouse/issues/16775). [#17847](https://github.com/ClickHouse/ClickHouse/pull/17847) ([Abi Palagashvili](https://github.com/fibersel)). +* Add exponential backoff to reschedule read attempt in case RabbitMQ queues are empty. Closes [#24340](https://github.com/ClickHouse/ClickHouse/issues/24340). [#24415](https://github.com/ClickHouse/ClickHouse/pull/24415) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Index of type bloom_filter can be used for expressions with `hasAny` function with constant arrays. This closes: [#24291](https://github.com/ClickHouse/ClickHouse/issues/24291). [#24900](https://github.com/ClickHouse/ClickHouse/pull/24900) ([Vasily Nemkov](https://github.com/Enmk)). + +#### Improvement +* Fix Zero-Copy replication with several S3 volumes (Fixes [#22679](https://github.com/ClickHouse/ClickHouse/issues/22679)). [#22864](https://github.com/ClickHouse/ClickHouse/pull/22864) ([ianton-ru](https://github.com/ianton-ru)). +* Add ability to push down LIMIT for distributed queries. [#23027](https://github.com/ClickHouse/ClickHouse/pull/23027) ([Azat Khuzhin](https://github.com/azat)). +* Respect `insert_allow_materialized_columns` (allows materialized columns) for INSERT into `Distributed` table. [#23349](https://github.com/ClickHouse/ClickHouse/pull/23349) ([Azat Khuzhin](https://github.com/azat)). +* Here will be listed all the bugs that I am gonna to fix in this PR. [#23518](https://github.com/ClickHouse/ClickHouse/pull/23518) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Display progress for File table engine in clickhouse-local and on INSERT query in clickhouse-client when data is passed to stdin. Closes [#18209](https://github.com/ClickHouse/ClickHouse/issues/18209). [#23656](https://github.com/ClickHouse/ClickHouse/pull/23656) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Handle column name clashes for storage join, close [#20309](https://github.com/ClickHouse/ClickHouse/issues/20309). [#23769](https://github.com/ClickHouse/ClickHouse/pull/23769) ([Vladimir C](https://github.com/vdimir)). +* Add ability to split distributed batch on failures (i.e. due to memory limits, corruptions), under `distributed_directory_monitor_split_batch_on_failure` (OFF by default). [#23864](https://github.com/ClickHouse/ClickHouse/pull/23864) ([Azat Khuzhin](https://github.com/azat)). +* Add standalone `clickhouse-keeper` symlink to the main `clickhouse` binary. Now it's possible to run coordination without the main clickhouse server. [#24059](https://github.com/ClickHouse/ClickHouse/pull/24059) ([alesapin](https://github.com/alesapin)). +* Suppress exceptions from logger code. [#24069](https://github.com/ClickHouse/ClickHouse/pull/24069) ([Azat Khuzhin](https://github.com/azat)). +* Use global settings for query to `VIEW`. Fixed the behavior when queries to `VIEW` use local settings, that leads to errors if setting on `CREATE VIEW` and `SELECT` were different. As for now, `VIEW` won't use these modified settings, but you can still pass additional settings in `SETTINGS` section of `CREATE VIEW` query. Close [#20551](https://github.com/ClickHouse/ClickHouse/issues/20551). [#24095](https://github.com/ClickHouse/ClickHouse/pull/24095) ([Vladimir C](https://github.com/vdimir)). +* Add settings (`connection_auto_close`/`connection_max_tries`/`connection_pool_size`) for MySQL storage engine. [#24146](https://github.com/ClickHouse/ClickHouse/pull/24146) ([Azat Khuzhin](https://github.com/azat)). +* Fix trailing whitespaces in FROM clause with subqueries in multiline mode, and also changes the output of the queries slightly in a more human friendly way. [#24151](https://github.com/ClickHouse/ClickHouse/pull/24151) ([Azat Khuzhin](https://github.com/azat)). +* Recognize IPv4 addresses like `127.0.1.1` as local. This is controversial and closes [#23504](https://github.com/ClickHouse/ClickHouse/issues/23504). Michael Filimonov will test this feature. [#24316](https://github.com/ClickHouse/ClickHouse/pull/24316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix IPv6 addresses resolving (i.e. fixes `select * from remote('[::1]', system.one)`). [#24319](https://github.com/ClickHouse/ClickHouse/pull/24319) ([Azat Khuzhin](https://github.com/azat)). +* Now query_log has two new columns : `initial_query_start_time / initial_query_start_time_microsecond` that record the starting time of a distributed query if any. [#24388](https://github.com/ClickHouse/ClickHouse/pull/24388) ([Amos Bird](https://github.com/amosbird)). +* Rewrite more columns to possible alias expressions. This may enable better optimization, such as projections. [#24405](https://github.com/ClickHouse/ClickHouse/pull/24405) ([Amos Bird](https://github.com/amosbird)). +* Added optimization, that transforms some functions to reading of subcolumns to reduce amount of read data. E.g., statement `col IS NULL` is transformed to reading of subcolumn `col.null`. Optimization can be enabled by setting `optimize_functions_to_subcolumns`. [#24406](https://github.com/ClickHouse/ClickHouse/pull/24406) ([Anton Popov](https://github.com/CurtizJ)). +* Fix a data race on Keeper shutdown. [#24412](https://github.com/ClickHouse/ClickHouse/pull/24412) ([alesapin](https://github.com/alesapin)). +* Support postgres schema for insert queries. Closes [#24149](https://github.com/ClickHouse/ClickHouse/issues/24149). [#24413](https://github.com/ClickHouse/ClickHouse/pull/24413) ([Kseniia Sumarokova](https://github.com/kssenii)). +* If SSDDictionary is created with DDL query, it can be created only inside user_files directory. [#24466](https://github.com/ClickHouse/ClickHouse/pull/24466) ([Maksim Kita](https://github.com/kitaisreal)). +* Make String-to-Int parser stricter so that `toInt64('+')` will throw. [#24475](https://github.com/ClickHouse/ClickHouse/pull/24475) ([Amos Bird](https://github.com/amosbird)). +* Add merge tree setting `max_parts_to_merge_at_once` which limits the number of parts that can be merged in the background at once. Doesn't affect `OPTIMIZE FINAL` query. Fixes [#1820](https://github.com/ClickHouse/ClickHouse/issues/1820). [#24496](https://github.com/ClickHouse/ClickHouse/pull/24496) ([alesapin](https://github.com/alesapin)). +* Avoid hiding errors like `Limit for rows or bytes to read exceeded` for scalar subqueries. [#24545](https://github.com/ClickHouse/ClickHouse/pull/24545) ([nvartolomei](https://github.com/nvartolomei)). +* Add two Replicated*MergeTree settings: `max_replicated_fetches_network_bandwidth` and `max_replicated_sends_network_bandwidth` which allows to limit maximum speed of replicated fetches/sends for table. Add two server-wide settings (in `default` user profile): `max_replicated_fetches_network_bandwidth_for_server` and `max_replicated_sends_network_bandwidth_for_server` which limit maximum speed of replication for all tables. The settings are not followed perfectly accurately. Turned off by default. Fixes [#1821](https://github.com/ClickHouse/ClickHouse/issues/1821). [#24573](https://github.com/ClickHouse/ClickHouse/pull/24573) ([alesapin](https://github.com/alesapin)). +* Respect `max_distributed_connections` for `insert_distributed_sync` (otherwise for huge clusters and sync insert it may run out of `max_thread_pool_size`). [#24754](https://github.com/ClickHouse/ClickHouse/pull/24754) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a bug in `Replicated` database engine that might rarely cause some replica to skip enqueued DDL query. [#24805](https://github.com/ClickHouse/ClickHouse/pull/24805) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Some queries require multi-pass semantic analysis. Try reusing built sets for `IN` in this case. [#24874](https://github.com/ClickHouse/ClickHouse/pull/24874) ([Amos Bird](https://github.com/amosbird)). +* Allow `not in` operator to be used in partition pruning. [#24894](https://github.com/ClickHouse/ClickHouse/pull/24894) ([Amos Bird](https://github.com/amosbird)). +* Improved logging of S3 errors, no more double spaces in case of empty keys and buckets. [#24897](https://github.com/ClickHouse/ClickHouse/pull/24897) ([Vladimir Chebotarev](https://github.com/excitoon)). +* For distributed query, when `optimize_skip_unused_shards=1`, allow to skip shard with condition like `(sharding key) IN (one-element-tuple)`. (Tuples with many elements were supported. Tuple with single element did not work because it is parsed as literal). [#24930](https://github.com/ClickHouse/ClickHouse/pull/24930) ([Amos Bird](https://github.com/amosbird)). +* Detect linux version at runtime (for worked nested epoll, that is required for `async_socket_for_remote`/`use_hedged_requests`, otherwise remote queries may stuck). [#25067](https://github.com/ClickHouse/ClickHouse/pull/25067) ([Azat Khuzhin](https://github.com/azat)). +* Increase size of background schedule pool to 128 (`background_schedule_pool_size` setting). It allows avoiding replication queue hung on slow zookeeper connection. [#25072](https://github.com/ClickHouse/ClickHouse/pull/25072) ([alesapin](https://github.com/alesapin)). +* Fix topLevelDomain() for IDN hosts (i.e. `example.рф`), before it returns empty string for such hosts. [#25103](https://github.com/ClickHouse/ClickHouse/pull/25103) ([Azat Khuzhin](https://github.com/azat)). +* On server start, parts with incorrect partition ID would not be ever removed, but always detached. [#25070](https://github.com/ClickHouse/ClickHouse/issues/25070). [#25166](https://github.com/ClickHouse/ClickHouse/pull/25166) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Correct memory tracking in aggregate function `topK`. This closes [#25259](https://github.com/ClickHouse/ClickHouse/issues/25259). [#25260](https://github.com/ClickHouse/ClickHouse/pull/25260) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use separate `clickhouse-bridge` group and user for bridge processes. Set oom_score_adj so the bridges will be first subjects for OOM killer. Set set maximum RSS to 1 GiB. Closes [#23861](https://github.com/ClickHouse/ClickHouse/issues/23861). [#25280](https://github.com/ClickHouse/ClickHouse/pull/25280) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update prompt in `clickhouse-client` and display a message when reconnecting. This closes [#10577](https://github.com/ClickHouse/ClickHouse/issues/10577). [#25281](https://github.com/ClickHouse/ClickHouse/pull/25281) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for function `if` with Decimal and Int types on its branches. This closes [#20549](https://github.com/ClickHouse/ClickHouse/issues/20549). This closes [#10142](https://github.com/ClickHouse/ClickHouse/issues/10142). [#25283](https://github.com/ClickHouse/ClickHouse/pull/25283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add settings `http_max_fields`, `http_max_field_name_size`, `http_max_field_value_size`. [#25296](https://github.com/ClickHouse/ClickHouse/pull/25296) ([Ivan](https://github.com/abyss7)). +* Add == operator on time conditions for sequenceMatch and sequenceCount functions. For eg: sequenceMatch('(?1)(?t==1)(?2)')(time, data = 1, data = 2). [#25299](https://github.com/ClickHouse/ClickHouse/pull/25299) ([Christophe Kalenzaga](https://github.com/mga-chka)). +* Support Interval for LowCardinality, close [#21730](https://github.com/ClickHouse/ClickHouse/issues/21730). [#25410](https://github.com/ClickHouse/ClickHouse/pull/25410) ([Vladimir C](https://github.com/vdimir)). +* Flatbuffers library updated to v.2.0.0. Improvements list https://github.com/google/flatbuffers/releases/tag/v2.0.0. [#25474](https://github.com/ClickHouse/ClickHouse/pull/25474) ([Ilya Yatsishin](https://github.com/qoega)). +* Drop replicas from dirname for internal_replication=true (allows INSERT into Distributed with cluster from any number of replicas, before only 15 replicas was supported, everything more will fail with ENAMETOOLONG while creating directory for async blocks). [#25513](https://github.com/ClickHouse/ClickHouse/pull/25513) ([Azat Khuzhin](https://github.com/azat)). +* Resolve the actual port number bound when a user requests any available port from the operating system. [#25569](https://github.com/ClickHouse/ClickHouse/pull/25569) ([bnaecker](https://github.com/bnaecker)). +* Improve startup time of Distributed engine. [#25663](https://github.com/ClickHouse/ClickHouse/pull/25663) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Fix the bug in failover behavior when Engine=Kafka was not able to start consumption if the same consumer had an empty assignment previously. Closes [#21118](https://github.com/ClickHouse/ClickHouse/issues/21118). [#21267](https://github.com/ClickHouse/ClickHouse/pull/21267) ([filimonov](https://github.com/filimonov)). +* Fix waiting of automatic dropping of empty parts. It could lead to full filling of background pool and stuck of replication. [#23315](https://github.com/ClickHouse/ClickHouse/pull/23315) ([Anton Popov](https://github.com/CurtizJ)). +* Column cardinality in join output same as at the input, close [#23351](https://github.com/ClickHouse/ClickHouse/issues/23351), close [#20315](https://github.com/ClickHouse/ClickHouse/issues/20315). [#24061](https://github.com/ClickHouse/ClickHouse/pull/24061) ([Vladimir C](https://github.com/vdimir)). +* Use old modulo function version when used in partition key. Closes [#23508](https://github.com/ClickHouse/ClickHouse/issues/23508). [#24157](https://github.com/ClickHouse/ClickHouse/pull/24157) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Set `max_threads = 1` to fix mutation fail of StorageMemory. Closes [#24274](https://github.com/ClickHouse/ClickHouse/issues/24274). [#24275](https://github.com/ClickHouse/ClickHouse/pull/24275) ([flynn](https://github.com/ucasfl)). +* Allow empty HTTP headers. Fixes [#23901](https://github.com/ClickHouse/ClickHouse/issues/23901). [#24285](https://github.com/ClickHouse/ClickHouse/pull/24285) ([Ivan](https://github.com/abyss7)). +* Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix drop partition with intersect fake parts. In rare cases there might be parts with mutation version greater than current block number. [#24321](https://github.com/ClickHouse/ClickHouse/pull/24321) ([Amos Bird](https://github.com/amosbird)). +* In "multipart/form-data" message consider the CRLF preceding a boundary as part of it. Fixes [#23905](https://github.com/ClickHouse/ClickHouse/issues/23905). [#24399](https://github.com/ClickHouse/ClickHouse/pull/24399) ([Ivan](https://github.com/abyss7)). +* - Fixed the deadlock that can happen during LDAP role (re)mapping, when LDAP group is mapped to a nonexistent local role. [#24431](https://github.com/ClickHouse/ClickHouse/pull/24431) ([Denis Glazachev](https://github.com/traceon)). +* Fix incorrect monotonicity of toWeek function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/5212 , and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). +* In current CH version total_writes.bytes counter decreases too much during the buffer flush. It leads to counter overflow and totalBytes return something around 17.44 EB some time after the flush. This pr should fix it. ... [#24450](https://github.com/ClickHouse/ClickHouse/pull/24450) ([DimasKovas](https://github.com/DimasKovas)). +* Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` is being processed infinitely. This was detected on server with extremely little amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix usage of tuples in `CREATE .. AS SELECT` queries. [#24464](https://github.com/ClickHouse/ClickHouse/pull/24464) ([Anton Popov](https://github.com/CurtizJ)). +* Enable reading of subcolumns for distributed tables. [#24472](https://github.com/ClickHouse/ClickHouse/pull/24472) ([Anton Popov](https://github.com/CurtizJ)). +* Disallow building uniqXXXXStates of other aggregation states. [#24523](https://github.com/ClickHouse/ClickHouse/pull/24523) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed bug in deserialization of random generator state with might cause some data types such as `AggregateFunction(groupArraySample(N), T))` to behave in a non-deterministic way. [#24538](https://github.com/ClickHouse/ClickHouse/pull/24538) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bug which can lead to ZooKeeper client hung inside clickhouse-server. [#24721](https://github.com/ClickHouse/ClickHouse/pull/24721) ([alesapin](https://github.com/alesapin)). +* - If ZooKeeper connection was lost and replica was cloned after restoring the connection, its replication queue might contain outdated entries. It's fixed. - Fixed crash when replication queue contains intersecting virtual parts. It may rarely happen if some data part was lost. Print error in log instead of terminating. [#24777](https://github.com/ClickHouse/ClickHouse/pull/24777) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bug when exception `Mutation was killed` can be thrown to the client on mutation wait when mutation not loaded into memory yet. [#24809](https://github.com/ClickHouse/ClickHouse/pull/24809) ([alesapin](https://github.com/alesapin)). +* Allow NULL values in postgresql protocol. Closes [#22622](https://github.com/ClickHouse/ClickHouse/issues/22622). [#24857](https://github.com/ClickHouse/ClickHouse/pull/24857) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix "Missing columns" exception when joining Distributed Materialized View. [#24870](https://github.com/ClickHouse/ClickHouse/pull/24870) ([Azat Khuzhin](https://github.com/azat)). +* Fix extremely rare bug on low-memory servers which can lead to the inability to perform merges without restart. Possibly fixes [#24603](https://github.com/ClickHouse/ClickHouse/issues/24603). [#24872](https://github.com/ClickHouse/ClickHouse/pull/24872) ([alesapin](https://github.com/alesapin)). +* Fixed possible error 'Cannot read from istream at offset 0' when reading a file from DiskS3. [#24885](https://github.com/ClickHouse/ClickHouse/pull/24885) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fixed bug with declaring S3 disk at root of bucket. Earlier, it reported an error: ``` [heather] 2021.05.10 02:11:11.932234 [ 72790 ] {2ff80b7b-ec53-41cb-ac35-19bb390e1759} executeQuery: Code: 36, e.displayText() = DB::Exception: Key name is empty in path style S3 URI: (http://172.17.0.2/bucket/) (version 21.6.1.1) (from 127.0.0.1:47994) (in query: SELECT policy_name FROM system.storage_policies), Stack trace (when copying this message, always include the lines below):. [#24898](https://github.com/ClickHouse/ClickHouse/pull/24898) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix possible heap-buffer-overflow in Arrow. [#24922](https://github.com/ClickHouse/ClickHouse/pull/24922) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix limit/offset settings for distributed queries (ignore on the remote nodes). [#24940](https://github.com/ClickHouse/ClickHouse/pull/24940) ([Azat Khuzhin](https://github.com/azat)). +* Fix extremely rare error `Tagging already tagged part` in replication queue during concurrent `alter move/replace partition`. Possibly fixes [#22142](https://github.com/ClickHouse/ClickHouse/issues/22142). [#24961](https://github.com/ClickHouse/ClickHouse/pull/24961) ([alesapin](https://github.com/alesapin)). +* Fix serialization of splitted nested messages in Protobuf format. This PR fixes [#24647](https://github.com/ClickHouse/ClickHouse/issues/24647). [#25000](https://github.com/ClickHouse/ClickHouse/pull/25000) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix potential crash when calculating aggregate function states by aggregation of aggregate function states of other aggregate functions (not a practical use case). See [#24523](https://github.com/ClickHouse/ClickHouse/issues/24523). [#25015](https://github.com/ClickHouse/ClickHouse/pull/25015) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Distinguish KILL MUTATION for different tables (fixes unexpected `Cancelled mutating parts` error). [#25025](https://github.com/ClickHouse/ClickHouse/pull/25025) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong result when using aggregate projection with **not empty** `GROUP BY` key to execute query with `GROUP BY` by **empty** key. [#25055](https://github.com/ClickHouse/ClickHouse/pull/25055) ([Amos Bird](https://github.com/amosbird)). +* Fix bug which allows creating tables with columns referencing themselves like `a UInt32 ALIAS a + 1` or `b UInt32 MATERIALIZED b`. Fixes [#24910](https://github.com/ClickHouse/ClickHouse/issues/24910), [#24292](https://github.com/ClickHouse/ClickHouse/issues/24292). [#25059](https://github.com/ClickHouse/ClickHouse/pull/25059) ([alesapin](https://github.com/alesapin)). +* Fix bug with constant maps in mapContains that lead to error `empty column was returned by function mapContains`. Closes [#25077](https://github.com/ClickHouse/ClickHouse/issues/25077). [#25080](https://github.com/ClickHouse/ClickHouse/pull/25080) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in query with cross join and `joined_subquery_requires_alias = 0`. Fixes [#24011](https://github.com/ClickHouse/ClickHouse/issues/24011). [#25082](https://github.com/ClickHouse/ClickHouse/pull/25082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible parts loss after updating up to 21.5 in case table used `UUID` in partition key. (It is not recommended to use `UUID` in partition key). Fixes [#25070](https://github.com/ClickHouse/ClickHouse/issues/25070). [#25127](https://github.com/ClickHouse/ClickHouse/pull/25127) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not use table's projection for `SELECT` with `FINAL`. It is not supported yet. [#25163](https://github.com/ClickHouse/ClickHouse/pull/25163) ([Amos Bird](https://github.com/amosbird)). +* Fixed an error which occurred while inserting a subset of columns using CSVWithNames format. Fixes [#25129](https://github.com/ClickHouse/ClickHouse/issues/25129). [#25169](https://github.com/ClickHouse/ClickHouse/pull/25169) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix TOCTOU error in installation script. [#25277](https://github.com/ClickHouse/ClickHouse/pull/25277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect behaviour and UBSan report in big integers. In previous versions `CAST(1e19 AS UInt128)` returned zero. [#25279](https://github.com/ClickHouse/ClickHouse/pull/25279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix joinGetOrNull with not-nullable columns. This fixes [#24261](https://github.com/ClickHouse/ClickHouse/issues/24261). [#25288](https://github.com/ClickHouse/ClickHouse/pull/25288) ([Amos Bird](https://github.com/amosbird)). +* Fix error `Bad cast from type DB::ColumnLowCardinality to DB::ColumnVector` for queries where `LowCardinality` argument was used for IN (this bug appeared in 21.6). Fixes [#25187](https://github.com/ClickHouse/ClickHouse/issues/25187). [#25290](https://github.com/ClickHouse/ClickHouse/pull/25290) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Logical Error Cannot sum Array/Tuple in min/maxMap. [#25298](https://github.com/ClickHouse/ClickHouse/pull/25298) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `SimpleAggregateFunction(LowCardinality)` for `SummingMergeTree`. Fixes [#25134](https://github.com/ClickHouse/ClickHouse/issues/25134). [#25300](https://github.com/ClickHouse/ClickHouse/pull/25300) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* On ZooKeeper connection loss `ReplicatedMergeTree` table might wait for background operations to complete before trying to reconnect. It's fixed, now background operations are stopped forcefully. [#25306](https://github.com/ClickHouse/ClickHouse/pull/25306) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix the possibility of non-deterministic behaviour of the `quantileDeterministic` function and similar. This closes [#20480](https://github.com/ClickHouse/ClickHouse/issues/20480). [#25313](https://github.com/ClickHouse/ClickHouse/pull/25313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lost `WHERE` condition in expression-push-down optimization of query plan (setting `query_plan_filter_push_down = 1` by default). Fixes [#25368](https://github.com/ClickHouse/ClickHouse/issues/25368). [#25370](https://github.com/ClickHouse/ClickHouse/pull/25370) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `REPLACE` column transformer when used in DDL by correctly quoting the formated query. This fixes [#23925](https://github.com/ClickHouse/ClickHouse/issues/23925). [#25391](https://github.com/ClickHouse/ClickHouse/pull/25391) ([Amos Bird](https://github.com/amosbird)). +* Fix segfault when sharding_key is absent in task config for copier. [#25419](https://github.com/ClickHouse/ClickHouse/pull/25419) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix excessive underscore before the names of the preprocessed configuration files. [#25431](https://github.com/ClickHouse/ClickHouse/pull/25431) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix convertion of datetime with timezone for MySQL, PostgreSQL, ODBC. Closes [#5057](https://github.com/ClickHouse/ClickHouse/issues/5057). [#25528](https://github.com/ClickHouse/ClickHouse/pull/25528) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault in `Arrow` format when using `Decimal256`. Add arrow `Decimal256` support. [#25531](https://github.com/ClickHouse/ClickHouse/pull/25531) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed case, when sometimes conversion of postgres arrays resulted in String data type, not n-dimensional array, because `attndims` works incorrectly in some cases. Closes [#24804](https://github.com/ClickHouse/ClickHouse/issues/24804). [#25538](https://github.com/ClickHouse/ClickHouse/pull/25538) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix wrong totals for query `WITH TOTALS` and `WITH FILL`. Fixes [#20872](https://github.com/ClickHouse/ClickHouse/issues/20872). [#25539](https://github.com/ClickHouse/ClickHouse/pull/25539) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Key expression contains comparison between inconvertible types` for queries with `ARRAY JOIN` in case if array is used in primary key. Fixes [#8247](https://github.com/ClickHouse/ClickHouse/issues/8247). [#25546](https://github.com/ClickHouse/ClickHouse/pull/25546) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug which can lead to intersecting parts after merges with TTL: `Part all_40_40_0 is covered by all_40_40_1 but should be merged into all_40_41_1. This shouldn't happen often.`. [#25549](https://github.com/ClickHouse/ClickHouse/pull/25549) ([alesapin](https://github.com/alesapin)). +* Fix restore S3 table. [#25601](https://github.com/ClickHouse/ClickHouse/pull/25601) ([ianton-ru](https://github.com/ianton-ru)). +* Fix null pointer dereference in `EXPLAIN AST` without query. [#25631](https://github.com/ClickHouse/ClickHouse/pull/25631) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `REPLACE PARTITION` might be ignored in rare cases if the source partition was empty. It's fixed. Fixes [#24869](https://github.com/ClickHouse/ClickHouse/issues/24869). [#25665](https://github.com/ClickHouse/ClickHouse/pull/25665) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed `No such file or directory` error on moving `Distributed` table between databases. Fixes [#24971](https://github.com/ClickHouse/ClickHouse/issues/24971). [#25667](https://github.com/ClickHouse/ClickHouse/pull/25667) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix mysql select user() return empty. Fixes [#25683](https://github.com/ClickHouse/ClickHouse/issues/25683). [#25697](https://github.com/ClickHouse/ClickHouse/pull/25697) ([sundyli](https://github.com/sundy-li)). +* Fix data race when querying `system.clusters` while reloading the cluster configuration at the same time. [#25737](https://github.com/ClickHouse/ClickHouse/pull/25737) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Ubuntu 20.04 is now used to run integration tests, docker-compose version used to run integration tests is updated to 1.28.2. Environment variables now take effect on docker-compose. Rework test_dictionaries_all_layouts_separate_sources to allow parallel run. [#20393](https://github.com/ClickHouse/ClickHouse/pull/20393) ([Ilya Yatsishin](https://github.com/qoega)). +* - Testing for big ints using the following functions: * Arithmetic * Array, tuple, and map * Bit * Comparison * Conversion * Logical * Mathematical * Null * Rounding - Creating a table with columns that use the data types. [#24350](https://github.com/ClickHouse/ClickHouse/pull/24350) ([MyroTk](https://github.com/MyroTk)). +* Add libfuzzer tests for YAMLParser class. [#24480](https://github.com/ClickHouse/ClickHouse/pull/24480) ([BoloniniD](https://github.com/BoloniniD)). +* Adding support to save clickhouse server logs in TestFlows check. [#24504](https://github.com/ClickHouse/ClickHouse/pull/24504) ([vzakaznikov](https://github.com/vzakaznikov)). +* Integration tests configuration has special treatment for dictionaries. Removed remaining dictionaries manual setup. [#24728](https://github.com/ClickHouse/ClickHouse/pull/24728) ([Ilya Yatsishin](https://github.com/qoega)). +* Add integration test cases to cover JDBC bridge. [#25047](https://github.com/ClickHouse/ClickHouse/pull/25047) ([Zhichun Wu](https://github.com/zhicwu)). +* Disabling extended precision data types TestFlows tests. [#25125](https://github.com/ClickHouse/ClickHouse/pull/25125) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fix using Yandex dockerhub registries for TestFlows. [#25133](https://github.com/ClickHouse/ClickHouse/pull/25133) ([vzakaznikov](https://github.com/vzakaznikov)). +* Adding `leadInFrame` and `lagInFrame` window functions TestFlows tests. [#25144](https://github.com/ClickHouse/ClickHouse/pull/25144) ([vzakaznikov](https://github.com/vzakaznikov)). +* Enable build with s3 module in osx [#25217](https://github.com/ClickHouse/ClickHouse/issues/25217). [#25218](https://github.com/ClickHouse/ClickHouse/pull/25218) ([kevin wan](https://github.com/MaxWk)). +* - Added rounding to mathematical and arithmetic function tests for consistent snapshot comparison. - Cleaned up test names so they're more uniform. [#25297](https://github.com/ClickHouse/ClickHouse/pull/25297) ([MyroTk](https://github.com/MyroTk)). +* Increase LDAP verification cooldown performance tests timeout to 600 sec. [#25374](https://github.com/ClickHouse/ClickHouse/pull/25374) ([vzakaznikov](https://github.com/vzakaznikov)). +* Enabling TestFlows RBAC tests. [#25498](https://github.com/ClickHouse/ClickHouse/pull/25498) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add CI check for darwin-aarch64 cross-compilation. [#25560](https://github.com/ClickHouse/ClickHouse/pull/25560) ([Ivan](https://github.com/abyss7)). +* Changed CSS theme to dark for better code highlighting. [#25682](https://github.com/ClickHouse/ClickHouse/pull/25682) ([Mike Kot](https://github.com/myrrc)). + +#### Other +* Introduce ASTTableIdentifier into the code. [#16401](https://github.com/ClickHouse/ClickHouse/pull/16401) ([Ivan](https://github.com/abyss7)). +* Use std::filesystem instad of Poco::File. [#23657](https://github.com/ClickHouse/ClickHouse/pull/23657) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix init script so it does not print 'usage' message for each 'status' command run. [#25046](https://github.com/ClickHouse/ClickHouse/pull/25046) ([Denis Korenevskiy](https://github.com/DenKoren)). +* Fix cron.d task so it does not spam with email messages about current service status. [#25050](https://github.com/ClickHouse/ClickHouse/pull/25050) ([Denis Korenevskiy](https://github.com/DenKoren)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Pass Settings to aggregate function creator"'. [#24524](https://github.com/ClickHouse/ClickHouse/pull/24524) ([Vladimir C](https://github.com/vdimir)). +* NO CL ENTRY: 'Revert "Add initial_query_start_time to query log"'. [#25021](https://github.com/ClickHouse/ClickHouse/pull/25021) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add run-id option to integration tests"'. [#25526](https://github.com/ClickHouse/ClickHouse/pull/25526) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Implement h3ToGeo function"'. [#25593](https://github.com/ClickHouse/ClickHouse/pull/25593) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### Testing Improvement + +* * Add join related options to stress tests. [#25200](https://github.com/ClickHouse/ClickHouse/pull/25200) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v21.7.10.4-stable.md b/docs/changelogs/v21.7.10.4-stable.md new file mode 100644 index 00000000000..9056da8ac89 --- /dev/null +++ b/docs/changelogs/v21.7.10.4-stable.md @@ -0,0 +1,19 @@ +### ClickHouse release v21.7.10.4-stable FIXME as compared to v21.7.9.7-stable + +#### Improvement +* Backported in [#28898](https://github.com/ClickHouse/ClickHouse/issues/28898): Use real tmp file instead of predefined "rows_sources" for vertical merges. This avoids generating garbage directories in tmp disks. [#28299](https://github.com/ClickHouse/ClickHouse/pull/28299) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix +* Backported in [#27925](https://github.com/ClickHouse/ClickHouse/issues/27925): Fix PostgreSQL-style cast (`::` operator) with negative numbers. [#27876](https://github.com/ClickHouse/ClickHouse/pull/27876) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28752](https://github.com/ClickHouse/ClickHouse/issues/28752): Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#28509](https://github.com/ClickHouse/ClickHouse/issues/28509): Fixed possible ZooKeeper watches leak on background processing of distributed DDL queue. Closes [#26036](https://github.com/ClickHouse/ClickHouse/issues/26036). [#28446](https://github.com/ClickHouse/ClickHouse/pull/28446) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28570](https://github.com/ClickHouse/ClickHouse/issues/28570): Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after alter of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). +* Backported in [#28598](https://github.com/ClickHouse/ClickHouse/issues/28598): Fix `There is no subcolumn` error, while select from tables, which have `Nested` columns and scalar columns with dot in name and the same prefix as `Nested` (e.g. `n.id UInt32, n.arr1 Array(UInt64), n.arr2 Array(UInt64)`). [#28531](https://github.com/ClickHouse/ClickHouse/pull/28531) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28742](https://github.com/ClickHouse/ClickHouse/issues/28742): Fix the coredump in the creation of distributed tables, when the parameters passed in are wrong. [#28686](https://github.com/ClickHouse/ClickHouse/pull/28686) ([Zhiyong Wang](https://github.com/ljcui)). +* Backported in [#28788](https://github.com/ClickHouse/ClickHouse/issues/28788): Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). +* Backported in [#28947](https://github.com/ClickHouse/ClickHouse/issues/28947): Fix reading of subcolumns from compact parts. [#28873](https://github.com/ClickHouse/ClickHouse/pull/28873) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28931](https://github.com/ClickHouse/ClickHouse/issues/28931): Fix higher-order array functions (`SIGSEGV` for `arrayCompact`/`ILLEGAL_COLUMN` for `arrayDifference`/`arrayCumSumNonNegative`) with consts. [#28904](https://github.com/ClickHouse/ClickHouse/pull/28904) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.7.11.3-stable.md b/docs/changelogs/v21.7.11.3-stable.md new file mode 100644 index 00000000000..66672204713 --- /dev/null +++ b/docs/changelogs/v21.7.11.3-stable.md @@ -0,0 +1,7 @@ +### ClickHouse release v21.7.11.3-stable FIXME as compared to v21.7.10.4-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29024](https://github.com/ClickHouse/ClickHouse/issues/29024): Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29195](https://github.com/ClickHouse/ClickHouse/issues/29195): Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v21.7.2.7-stable.md b/docs/changelogs/v21.7.2.7-stable.md new file mode 100644 index 00000000000..a7af0fed667 --- /dev/null +++ b/docs/changelogs/v21.7.2.7-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v21.7.2.7-stable FIXME as compared to v21.7.1.7283-prestable + +#### Improvement +* Backported in [#25881](https://github.com/ClickHouse/ClickHouse/issues/25881): Allow to start clickhouse-client with unreadable working directory. [#25817](https://github.com/ClickHouse/ClickHouse/pull/25817) ([ianton-ru](https://github.com/ianton-ru)). + +#### Bug Fix +* Backported in [#25833](https://github.com/ClickHouse/ClickHouse/issues/25833): `CAST` from `Date` to `DateTime` (or `DateTime64`) was not using the timezone of the `DateTime` type. It can also affect the comparison between `Date` and `DateTime`. Inference of the common type for `Date` and `DateTime` also was not using the corresponding timezone. It affected the results of function `if` and array construction. Closes [#24128](https://github.com/ClickHouse/ClickHouse/issues/24128). [#24129](https://github.com/ClickHouse/ClickHouse/pull/24129) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#25766](https://github.com/ClickHouse/ClickHouse/issues/25766): Fix assertion in PREWHERE with non-uint8 type, close [#19589](https://github.com/ClickHouse/ClickHouse/issues/19589). [#25484](https://github.com/ClickHouse/ClickHouse/pull/25484) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#25954](https://github.com/ClickHouse/ClickHouse/issues/25954): Fix `ALTER MODIFY COLUMN` of columns, which participates in TTL expressions. [#25554](https://github.com/ClickHouse/ClickHouse/pull/25554) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#25871](https://github.com/ClickHouse/ClickHouse/issues/25871): Fix rare bug with `DROP PART` query for `ReplicatedMergeTree` tables which can lead to error message `Unexpected merged part intersecting drop range`. [#25783](https://github.com/ClickHouse/ClickHouse/pull/25783) ([alesapin](https://github.com/alesapin)). +* Backported in [#25886](https://github.com/ClickHouse/ClickHouse/issues/25886): Fix ARM exception handling with non default page size. Fixes [#25512](https://github.com/ClickHouse/ClickHouse/issues/25512). Fixes [#25044](https://github.com/ClickHouse/ClickHouse/issues/25044). Fixes [#24901](https://github.com/ClickHouse/ClickHouse/issues/24901). Fixes [#23183](https://github.com/ClickHouse/ClickHouse/issues/23183). Fixes [#20221](https://github.com/ClickHouse/ClickHouse/issues/20221). Fixes [#19703](https://github.com/ClickHouse/ClickHouse/issues/19703). Fixes [#19028](https://github.com/ClickHouse/ClickHouse/issues/19028). Fixes [#18391](https://github.com/ClickHouse/ClickHouse/issues/18391). Fixes [#18121](https://github.com/ClickHouse/ClickHouse/issues/18121). Fixes [#17994](https://github.com/ClickHouse/ClickHouse/issues/17994). Fixes [#12483](https://github.com/ClickHouse/ClickHouse/issues/12483). [#25854](https://github.com/ClickHouse/ClickHouse/pull/25854) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#25957](https://github.com/ClickHouse/ClickHouse/issues/25957): Fix extremely long backoff for background tasks when the background pool is full. Fixes [#25836](https://github.com/ClickHouse/ClickHouse/issues/25836). [#25893](https://github.com/ClickHouse/ClickHouse/pull/25893) ([alesapin](https://github.com/alesapin)). +* Backported in [#25932](https://github.com/ClickHouse/ClickHouse/issues/25932): Fix crash on call dictGet() with bad arguments. [#25913](https://github.com/ClickHouse/ClickHouse/pull/25913) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#25981](https://github.com/ClickHouse/ClickHouse/issues/25981): Fix possible deadlock during query profiler stack unwinding. Fixes [#25968](https://github.com/ClickHouse/ClickHouse/issues/25968). [#25970](https://github.com/ClickHouse/ClickHouse/pull/25970) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#26010](https://github.com/ClickHouse/ClickHouse/issues/26010): Fix formatting of type `Map` with integer keys to `JSON`. [#25982](https://github.com/ClickHouse/ClickHouse/pull/25982) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#26097](https://github.com/ClickHouse/ClickHouse/issues/26097): Fix wrong thread estimation for right subquery join in some cases. Close [#24075](https://github.com/ClickHouse/ClickHouse/issues/24075). [#26052](https://github.com/ClickHouse/ClickHouse/pull/26052) ([Vladimir C](https://github.com/vdimir)). + diff --git a/docs/changelogs/v21.7.3.14-stable.md b/docs/changelogs/v21.7.3.14-stable.md new file mode 100644 index 00000000000..d24b7bcbf39 --- /dev/null +++ b/docs/changelogs/v21.7.3.14-stable.md @@ -0,0 +1,11 @@ +### ClickHouse release v21.7.3.14-stable FIXME as compared to v21.7.2.7-stable + +#### Bug Fix +* Backported in [#26191](https://github.com/ClickHouse/ClickHouse/issues/26191): Fix sharding_key from column w/o function for remote() (before `select * from remote('127.1', system.one, dummy)` leads to `Unknown column: dummy, there are only columns .` error). [#25824](https://github.com/ClickHouse/ClickHouse/pull/25824) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#26109](https://github.com/ClickHouse/ClickHouse/issues/26109): Fix rare server crash because of `abort` in ZooKeeper client. Fixes [#25813](https://github.com/ClickHouse/ClickHouse/issues/25813). [#26079](https://github.com/ClickHouse/ClickHouse/pull/26079) ([alesapin](https://github.com/alesapin)). +* Backported in [#26142](https://github.com/ClickHouse/ClickHouse/issues/26142): Fix possible crash in `pointInPolygon` if the setting `validate_polygons` is turned off. [#26113](https://github.com/ClickHouse/ClickHouse/pull/26113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#26170](https://github.com/ClickHouse/ClickHouse/issues/26170): Fix `joinGet` with LowCarinality columns, close [#25993](https://github.com/ClickHouse/ClickHouse/issues/25993). [#26118](https://github.com/ClickHouse/ClickHouse/pull/26118) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#26206](https://github.com/ClickHouse/ClickHouse/issues/26206): Fix potential crash if more than one `untuple` expression is used. [#26179](https://github.com/ClickHouse/ClickHouse/pull/26179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#26229](https://github.com/ClickHouse/ClickHouse/issues/26229): Remove excessive newline in `thread_name` column in `system.stack_trace` table. This fixes [#24124](https://github.com/ClickHouse/ClickHouse/issues/24124). [#26210](https://github.com/ClickHouse/ClickHouse/pull/26210) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix throwing exception when iterate over non existing remote directory. [#26296](https://github.com/ClickHouse/ClickHouse/pull/26296) ([ianton-ru](https://github.com/ianton-ru)). + diff --git a/docs/changelogs/v21.7.4.18-stable.md b/docs/changelogs/v21.7.4.18-stable.md new file mode 100644 index 00000000000..7bc08e2a0e3 --- /dev/null +++ b/docs/changelogs/v21.7.4.18-stable.md @@ -0,0 +1,10 @@ +### ClickHouse release v21.7.4.18-stable FIXME as compared to v21.7.3.14-stable + +#### Bug Fix +* Backported in [#26297](https://github.com/ClickHouse/ClickHouse/issues/26297): Fixed incorrect `sequence_id` in MySQL protocol packets that ClickHouse sends on exception during query execution. It might cause MySQL client to reset connection to ClickHouse server. Fixes [#21184](https://github.com/ClickHouse/ClickHouse/issues/21184). [#26051](https://github.com/ClickHouse/ClickHouse/pull/26051) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26356](https://github.com/ClickHouse/ClickHouse/issues/26356): Fix logical error on join with totals, close [#26017](https://github.com/ClickHouse/ClickHouse/issues/26017). [#26250](https://github.com/ClickHouse/ClickHouse/pull/26250) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#26359](https://github.com/ClickHouse/ClickHouse/issues/26359): Fixed rare bug in lost replica recovery that may cause replicas to diverge. [#26321](https://github.com/ClickHouse/ClickHouse/pull/26321) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26419](https://github.com/ClickHouse/ClickHouse/issues/26419): Fix possible crash when login as dropped user. This PR fixes [#26073](https://github.com/ClickHouse/ClickHouse/issues/26073). [#26363](https://github.com/ClickHouse/ClickHouse/pull/26363) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#26413](https://github.com/ClickHouse/ClickHouse/issues/26413): Fix infinite non joined block stream in `partial_merge_join` close [#26325](https://github.com/ClickHouse/ClickHouse/issues/26325). [#26374](https://github.com/ClickHouse/ClickHouse/pull/26374) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#26447](https://github.com/ClickHouse/ClickHouse/issues/26447): Fix some fuzzed msan crash. Fixes [#22517](https://github.com/ClickHouse/ClickHouse/issues/22517). [#26428](https://github.com/ClickHouse/ClickHouse/pull/26428) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.7.5.29-stable.md b/docs/changelogs/v21.7.5.29-stable.md new file mode 100644 index 00000000000..3f24e3eded9 --- /dev/null +++ b/docs/changelogs/v21.7.5.29-stable.md @@ -0,0 +1,16 @@ +### ClickHouse release v21.7.5.29-stable FIXME as compared to v21.7.4.18-stable + +#### Performance Improvement +* Backported in [#26526](https://github.com/ClickHouse/ClickHouse/issues/26526): Improve latency of short queries, that require reading from tables with large number of columns. [#26371](https://github.com/ClickHouse/ClickHouse/pull/26371) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* Backported in [#26787](https://github.com/ClickHouse/ClickHouse/issues/26787): Fix zstd decompression in case there are escape sequences at the end of internal buffer. Closes [#26013](https://github.com/ClickHouse/ClickHouse/issues/26013). [#26314](https://github.com/ClickHouse/ClickHouse/pull/26314) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#26487](https://github.com/ClickHouse/ClickHouse/issues/26487): Fix broken name resolution after rewriting column aliases. This fixes [#26432](https://github.com/ClickHouse/ClickHouse/issues/26432). [#26475](https://github.com/ClickHouse/ClickHouse/pull/26475) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#26615](https://github.com/ClickHouse/ClickHouse/issues/26615): Fix issues with `CREATE DICTIONARY` query if dictionary name or database name was quoted. Closes [#26491](https://github.com/ClickHouse/ClickHouse/issues/26491). [#26508](https://github.com/ClickHouse/ClickHouse/pull/26508) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#26606](https://github.com/ClickHouse/ClickHouse/issues/26606): Fix crash in rabbitmq shutdown in case rabbitmq setup was not started. Closes [#26504](https://github.com/ClickHouse/ClickHouse/issues/26504). [#26529](https://github.com/ClickHouse/ClickHouse/pull/26529) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#26612](https://github.com/ClickHouse/ClickHouse/issues/26612): Update `chown` cmd check in clickhouse-server docker entrypoint. It fixes the bug that cluster pod restart failed (or timeout) on kubernetes. [#26545](https://github.com/ClickHouse/ClickHouse/pull/26545) ([Ky Li](https://github.com/Kylinrix)). +* Backported in [#26647](https://github.com/ClickHouse/ClickHouse/issues/26647): Fix incorrect function names of groupBitmapAnd/Or/Xor. This fixes. [#26557](https://github.com/ClickHouse/ClickHouse/pull/26557) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#26704](https://github.com/ClickHouse/ClickHouse/issues/26704): Fix potential nullptr dereference in window functions. This fixes [#25276](https://github.com/ClickHouse/ClickHouse/issues/25276). [#26668](https://github.com/ClickHouse/ClickHouse/pull/26668) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#26772](https://github.com/ClickHouse/ClickHouse/issues/26772): Sometimes SET ROLE could work incorrectly, this PR fixes that. [#26707](https://github.com/ClickHouse/ClickHouse/pull/26707) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#26907](https://github.com/ClickHouse/ClickHouse/issues/26907): Fix library-bridge ids load. [#26834](https://github.com/ClickHouse/ClickHouse/pull/26834) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v21.7.6.39-stable.md b/docs/changelogs/v21.7.6.39-stable.md new file mode 100644 index 00000000000..a7913aca193 --- /dev/null +++ b/docs/changelogs/v21.7.6.39-stable.md @@ -0,0 +1,14 @@ +### ClickHouse release v21.7.6.39-stable FIXME as compared to v21.7.5.29-stable + +#### Bug Fix +* Backported in [#26854](https://github.com/ClickHouse/ClickHouse/issues/26854): ParallelFormattingOutputFormat: Use mutex to handle the join to the collector_thread (https://github.com/ClickHouse/ClickHouse/issues/26694). [#26703](https://github.com/ClickHouse/ClickHouse/pull/26703) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#26938](https://github.com/ClickHouse/ClickHouse/issues/26938): Do not remove data on ReplicatedMergeTree table shutdown to avoid creating data to metadata inconsistency. [#26716](https://github.com/ClickHouse/ClickHouse/pull/26716) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#26986](https://github.com/ClickHouse/ClickHouse/issues/26986): Aggregate function parameters might be lost when applying some combinators causing exceptions like `Conversion from AggregateFunction(topKArray, Array(String)) to AggregateFunction(topKArray(10), Array(String)) is not supported`. It's fixed. Fixes [#26196](https://github.com/ClickHouse/ClickHouse/issues/26196) and [#26433](https://github.com/ClickHouse/ClickHouse/issues/26433). [#26814](https://github.com/ClickHouse/ClickHouse/pull/26814) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26944](https://github.com/ClickHouse/ClickHouse/issues/26944): Fix error `Missing columns: 'xxx'` when `DEFAULT` column references other non materialized column without `DEFAULT` expression. Fixes [#26591](https://github.com/ClickHouse/ClickHouse/issues/26591). [#26900](https://github.com/ClickHouse/ClickHouse/pull/26900) ([alesapin](https://github.com/alesapin)). +* Backported in [#26999](https://github.com/ClickHouse/ClickHouse/issues/26999): Fix reading of custom TLDs (stops processing with lower buffer or bigger file). [#26948](https://github.com/ClickHouse/ClickHouse/pull/26948) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27029](https://github.com/ClickHouse/ClickHouse/issues/27029): Now partition ID in queries like `ALTER TABLE ... PARTITION ID xxx` validates for correctness. Fixes [#25718](https://github.com/ClickHouse/ClickHouse/issues/25718). [#26963](https://github.com/ClickHouse/ClickHouse/pull/26963) ([alesapin](https://github.com/alesapin)). +* Backported in [#27050](https://github.com/ClickHouse/ClickHouse/issues/27050): [RFC] Fix possible mutation stack due to race with DROP_RANGE. [#27002](https://github.com/ClickHouse/ClickHouse/pull/27002) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27105](https://github.com/ClickHouse/ClickHouse/issues/27105): Fixed `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache` configuration parsing. Options `allow_read_expired_keys`, `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds` were not parsed for dictionaries with non `cache` type. [#27032](https://github.com/ClickHouse/ClickHouse/pull/27032) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#27156](https://github.com/ClickHouse/ClickHouse/issues/27156): Fix synchronization in GRPCServer This PR fixes [#27024](https://github.com/ClickHouse/ClickHouse/issues/27024). [#27064](https://github.com/ClickHouse/ClickHouse/pull/27064) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27261](https://github.com/ClickHouse/ClickHouse/issues/27261): In rare cases `system.detached_parts` table might contain incorrect information for some parts, it's fixed. Fixes [#27114](https://github.com/ClickHouse/ClickHouse/issues/27114). [#27183](https://github.com/ClickHouse/ClickHouse/pull/27183) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v21.7.7.47-stable.md b/docs/changelogs/v21.7.7.47-stable.md new file mode 100644 index 00000000000..f81abca2600 --- /dev/null +++ b/docs/changelogs/v21.7.7.47-stable.md @@ -0,0 +1,8 @@ +### ClickHouse release v21.7.7.47-stable FIXME as compared to v21.7.6.39-stable + +#### Bug Fix +* Backported in [#27364](https://github.com/ClickHouse/ClickHouse/issues/27364): - Fix uninitialized memory in functions `multiSearch*` with empty array, close [#27169](https://github.com/ClickHouse/ClickHouse/issues/27169). [#27181](https://github.com/ClickHouse/ClickHouse/pull/27181) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#27412](https://github.com/ClickHouse/ClickHouse/issues/27412): Fix `distributed_group_by_no_merge=2`+`distributed_push_down_limit=1` or `optimize_distributed_group_by_sharding_key=1` with `LIMIT BY` and `LIMIT OFFSET`. [#27249](https://github.com/ClickHouse/ClickHouse/pull/27249) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27418](https://github.com/ClickHouse/ClickHouse/issues/27418): Fix `Cannot find column` error for queries with sampling. Was introduced in [#24574](https://github.com/ClickHouse/ClickHouse/issues/24574). Fixes [#26522](https://github.com/ClickHouse/ClickHouse/issues/26522). [#27301](https://github.com/ClickHouse/ClickHouse/pull/27301) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#27416](https://github.com/ClickHouse/ClickHouse/issues/27416): Fixed incorrect validation of partition id for MergeTree tables that created with old syntax. [#27328](https://github.com/ClickHouse/ClickHouse/pull/27328) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v21.7.8.58-stable.md b/docs/changelogs/v21.7.8.58-stable.md new file mode 100644 index 00000000000..aea1ae083f0 --- /dev/null +++ b/docs/changelogs/v21.7.8.58-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.7.8.58-stable FIXME as compared to v21.7.7.47-stable + +#### Bug Fix +* Backported in [#27506](https://github.com/ClickHouse/ClickHouse/issues/27506): Fix errors like `Expected ColumnLowCardinality, gotUInt8` or `Bad cast from type DB::ColumnVector to DB::ColumnLowCardinality` for some queries with `LowCardinality` in `PREWHERE`. Fixes [#23515](https://github.com/ClickHouse/ClickHouse/issues/23515). [#27298](https://github.com/ClickHouse/ClickHouse/pull/27298) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#27644](https://github.com/ClickHouse/ClickHouse/issues/27644): Fix incorrect result for query with row-level security, prewhere and LowCardinality filter. Fixes [#27179](https://github.com/ClickHouse/ClickHouse/issues/27179). [#27329](https://github.com/ClickHouse/ClickHouse/pull/27329) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#27474](https://github.com/ClickHouse/ClickHouse/issues/27474): fix metric BackgroundMessageBrokerSchedulePoolTask, maybe mistyped。. [#27452](https://github.com/ClickHouse/ClickHouse/pull/27452) ([Ben](https://github.com/benbiti)). +* Backported in [#27649](https://github.com/ClickHouse/ClickHouse/issues/27649): Fix crash during projection materialization when some parts contain missing columns. This fixes [#27512](https://github.com/ClickHouse/ClickHouse/issues/27512). [#27528](https://github.com/ClickHouse/ClickHouse/pull/27528) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#27778](https://github.com/ClickHouse/ClickHouse/issues/27778): - Fix bug with aliased column in `Distributed` table. [#27652](https://github.com/ClickHouse/ClickHouse/pull/27652) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#27674](https://github.com/ClickHouse/ClickHouse/issues/27674): Fix postgresql table function resulting in non-closing connections. Closes [#26088](https://github.com/ClickHouse/ClickHouse/issues/26088). [#27662](https://github.com/ClickHouse/ClickHouse/pull/27662) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#27697](https://github.com/ClickHouse/ClickHouse/issues/27697): Fix bad type cast when functions like `arrayHas` are applied to arrays of LowCardinality of Nullable of different non-numeric types like `DateTime` and `DateTime64`. In previous versions bad cast occurs. In new version it will lead to exception. This closes [#26330](https://github.com/ClickHouse/ClickHouse/issues/26330). [#27682](https://github.com/ClickHouse/ClickHouse/pull/27682) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#27748](https://github.com/ClickHouse/ClickHouse/issues/27748): Remove duplicated source files in CMakeLists.txt in arrow-cmake. [#27736](https://github.com/ClickHouse/ClickHouse/pull/27736) ([李扬](https://github.com/taiyang-li)). + diff --git a/docs/changelogs/v21.7.9.7-stable.md b/docs/changelogs/v21.7.9.7-stable.md new file mode 100644 index 00000000000..0d1a2a521e7 --- /dev/null +++ b/docs/changelogs/v21.7.9.7-stable.md @@ -0,0 +1,27 @@ +### ClickHouse release v21.7.9.7-stable FIXME as compared to v21.7.8.58-stable + +#### Improvement +* Backported in [#27892](https://github.com/ClickHouse/ClickHouse/issues/27892): Allow symlinks for library dictionaty path. [#27815](https://github.com/ClickHouse/ClickHouse/pull/27815) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#28155](https://github.com/ClickHouse/ClickHouse/issues/28155): Use Multipart copy upload for large S3 objects. [#27858](https://github.com/ClickHouse/ClickHouse/pull/27858) ([ianton-ru](https://github.com/ianton-ru)). + +#### Bug Fix +* Backported in [#27976](https://github.com/ClickHouse/ClickHouse/issues/27976): Bugfix for windowFunnel's "strict" mode. This fixes [#27469](https://github.com/ClickHouse/ClickHouse/issues/27469). [#27563](https://github.com/ClickHouse/ClickHouse/pull/27563) ([achimbab](https://github.com/achimbab)). +* Backported in [#27780](https://github.com/ClickHouse/ClickHouse/issues/27780): Fix column filtering with union distinct in subquery. Closes [#27578](https://github.com/ClickHouse/ClickHouse/issues/27578). [#27689](https://github.com/ClickHouse/ClickHouse/pull/27689) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#27866](https://github.com/ClickHouse/ClickHouse/issues/27866): Prevent crashes for some formats when NULL (tombstone) message was coming from Kafka. Closes [#19255](https://github.com/ClickHouse/ClickHouse/issues/19255). [#27794](https://github.com/ClickHouse/ClickHouse/pull/27794) ([filimonov](https://github.com/filimonov)). +* Backported in [#28347](https://github.com/ClickHouse/ClickHouse/issues/28347): Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). +* Backported in [#27955](https://github.com/ClickHouse/ClickHouse/issues/27955): Fix selecting with extremes from a column of the type `LowCardinality(UUID)`. [#27918](https://github.com/ClickHouse/ClickHouse/pull/27918) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27951](https://github.com/ClickHouse/ClickHouse/issues/27951): Check cluster name before creating Distributed table, do not allow to create a table with incorrect cluster name. Fixes [#27832](https://github.com/ClickHouse/ClickHouse/issues/27832). [#27927](https://github.com/ClickHouse/ClickHouse/pull/27927) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28208](https://github.com/ClickHouse/ClickHouse/issues/28208): Fix cases, when read buffer fails with 'attempt to read after end of file'. Closes [#26149](https://github.com/ClickHouse/ClickHouse/issues/26149). [#28150](https://github.com/ClickHouse/ClickHouse/pull/28150) ([Filatenkov Artur](https://github.com/FArthur-cmd)). + +#### Build/Testing/Packaging Improvement +* Backported in [#28032](https://github.com/ClickHouse/ClickHouse/issues/28032): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#28116](https://github.com/ClickHouse/ClickHouse/issues/28116): Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#28183](https://github.com/ClickHouse/ClickHouse/issues/28183): Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). +* Backported in [#28255](https://github.com/ClickHouse/ClickHouse/issues/28255): Fix incorrect behavior in `clickhouse-keeper` when list watches (`getChildren`) triggered with `set` requests for children. [#28190](https://github.com/ClickHouse/ClickHouse/pull/28190) ([alesapin](https://github.com/alesapin)). +* Backported in [#28265](https://github.com/ClickHouse/ClickHouse/issues/28265): Fix possible read of uninitialized memory for queries with `Nullable(LowCardinality)` type and extremes. Fixes [#28165](https://github.com/ClickHouse/ClickHouse/issues/28165). [#28205](https://github.com/ClickHouse/ClickHouse/pull/28205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#28288](https://github.com/ClickHouse/ClickHouse/issues/28288): Fix reading of custom TLD w/o new line at EOF. [#28213](https://github.com/ClickHouse/ClickHouse/pull/28213) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28295](https://github.com/ClickHouse/ClickHouse/issues/28295): Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v21.8.1.7409-prestable.md b/docs/changelogs/v21.8.1.7409-prestable.md new file mode 100644 index 00000000000..e703d227603 --- /dev/null +++ b/docs/changelogs/v21.8.1.7409-prestable.md @@ -0,0 +1,88 @@ +### ClickHouse release v21.8.1.7409-prestable FIXME as compared to v21.7.1.7283-prestable + +#### Backward Incompatible Change +* - Backward Incompatible Change:. [#23934](https://github.com/ClickHouse/ClickHouse/pull/23934) ([hexiaoting](https://github.com/hexiaoting)). + +#### New Feature +* Add an ability to reset custom setting to default and remove it from table's metadata. This will allow to rollback the change without knowing the system/config's default. Closes [#14449](https://github.com/ClickHouse/ClickHouse/issues/14449). [#17769](https://github.com/ClickHouse/ClickHouse/pull/17769) ([xjewer](https://github.com/xjewer)). +* Add MaterializedPostgreSQL table engine and database engine. Database engine allows to replicate a whole database or any subset of database tables. [#20470](https://github.com/ClickHouse/ClickHouse/pull/20470) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Adding support for a part of SQLJSON standard. [#24148](https://github.com/ClickHouse/ClickHouse/pull/24148) ([l1tsolaiki](https://github.com/l1tsolaiki)). +* Collect common system metrics (in `system.asynchronous_metrics` and `system.asynchronous_metric_log`) about CPU usage, disk usage, memory usage, IO, network, files, load average, CPU frequencies, thermal sensors, EDAC counters, system uptime; also added metrics about the scheduling jitter and the time spent collecting the metrics. It works like `atop` in ClickHouse and allows to get monitoring data even if you have no additional tools installed. This closes [#9430](https://github.com/ClickHouse/ClickHouse/issues/9430). [#24416](https://github.com/ClickHouse/ClickHouse/pull/24416) ([Yegor Levankov](https://github.com/elevankoff)). +* Add support `DISTINCT ON (columns)` expression, close [#25404](https://github.com/ClickHouse/ClickHouse/issues/25404). [#25589](https://github.com/ClickHouse/ClickHouse/pull/25589) ([Zijie Lu](https://github.com/TszKitLo40)). +* * Support Map type in `mapAdd` and `mapSubtract` functions * Support (U)Int128, U(Int256) types in `mapAdd` and `mapSubtract` functions. [#25596](https://github.com/ClickHouse/ClickHouse/pull/25596) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Add bin/unbin functions support. [#25609](https://github.com/ClickHouse/ClickHouse/pull/25609) ([zhaoyu](https://github.com/zxc111)). +* Introduce `system.data_skipping_indices` table containing information about existing data skipping indices. Closes [#7659](https://github.com/ClickHouse/ClickHouse/issues/7659). [#25693](https://github.com/ClickHouse/ClickHouse/pull/25693) ([Dmitry Novik](https://github.com/novikd)). +* in addition to https://github.com/ClickHouse/ClickHouse/pull/12073 add the FIRST keyword to the ADD INDEX command to be able to add index in the beginning of the indices list. [#25904](https://github.com/ClickHouse/ClickHouse/pull/25904) ([xjewer](https://github.com/xjewer)). +* Render pipelines as graphs in Web UI if `EXPLAIN PIPELINE graph = 1` query is given. [#26067](https://github.com/ClickHouse/ClickHouse/pull/26067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add new functions `leftPad()`, `rightPad()`, `leftPadUTF8()`, `rightPadUTF8()`. [#26075](https://github.com/ClickHouse/ClickHouse/pull/26075) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Performance Improvement +* Added option to compile aggregate functions if `compile_aggregate_expressions` settings is on. [#24789](https://github.com/ClickHouse/ClickHouse/pull/24789) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Improvement +* `Database` argument for `StorageMerge` support regular expression. This closes #776. [#25064](https://github.com/ClickHouse/ClickHouse/pull/25064) ([flynn](https://github.com/ucasfl)). +* Allow extract non-string element as string using JSONExtract. This is for [#25414](https://github.com/ClickHouse/ClickHouse/issues/25414). [#25452](https://github.com/ClickHouse/ClickHouse/pull/25452) ([Amos Bird](https://github.com/amosbird)). +* Support for dynamic reloading of config to change number of threads in pool for background jobs execution (merges, mutations, fetches). [#25548](https://github.com/ClickHouse/ClickHouse/pull/25548) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support TRUNCATE TABLE for StorageS3 and StorageHDFS. Closes [#25530](https://github.com/ClickHouse/ClickHouse/issues/25530). [#25550](https://github.com/ClickHouse/ClickHouse/pull/25550) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make `NetworkReceiveElapsedMicroseconds` metric to correctly include the time spent waiting for data from the client to INSERT. This closes [#9958](https://github.com/ClickHouse/ClickHouse/issues/9958). [#25602](https://github.com/ClickHouse/ClickHouse/pull/25602) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible logical race condition between `ALTER TABLE ... DETACH` and background merges. [#25605](https://github.com/ClickHouse/ClickHouse/pull/25605) ([Azat Khuzhin](https://github.com/azat)). +* Support materialized and aliased columns in joins, close [#13274](https://github.com/ClickHouse/ClickHouse/issues/13274). [#25634](https://github.com/ClickHouse/ClickHouse/pull/25634) ([Vladimir C](https://github.com/vdimir)). +* MaterializeMySQL now supports `ENUM` data type. [#25676](https://github.com/ClickHouse/ClickHouse/pull/25676) ([Storozhuk Kostiantyn](https://github.com/sand6255)). +* Cancel already running merges in partition on `DROP PARTITION` and `TRUNCATE` for `ReplicatedMergeTree`. Resolves [#17151](https://github.com/ClickHouse/ClickHouse/issues/17151). [#25684](https://github.com/ClickHouse/ClickHouse/pull/25684) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use `Map` data type for key-value dictionaries in system logs tables (`system.query_log`, `system.query_thread_log`, `system.processes`, `system.opentelemetry_span_log`). Virtual columns are created to support old queries. Closes [#18698](https://github.com/ClickHouse/ClickHouse/issues/18698). Authors @hexiaoting, @sundy-li. [#25773](https://github.com/ClickHouse/ClickHouse/pull/25773) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix inconsistent behaviour of GROUP BY constant on empty set. Closes [#6842](https://github.com/ClickHouse/ClickHouse/issues/6842). [#25786](https://github.com/ClickHouse/ClickHouse/pull/25786) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MySQL Engine now supports the exchange of column comments between MySQL and ClickHouse. [#25795](https://github.com/ClickHouse/ClickHouse/pull/25795) ([Storozhuk Kostiantyn](https://github.com/sand6255)). +* Fix "No available columns" for Merge() storage. [#25801](https://github.com/ClickHouse/ClickHouse/pull/25801) ([Azat Khuzhin](https://github.com/azat)). +* Allow to start clickhouse-client with unreadable working directory. [#25817](https://github.com/ClickHouse/ClickHouse/pull/25817) ([ianton-ru](https://github.com/ianton-ru)). +* Better handling of lost parts for ReplicatedMergeTree tables. Fixes rare inconsistencies in ReplicationQueue. Nothing should be visible to the user. Fixes [#10368](https://github.com/ClickHouse/ClickHouse/issues/10368). [#25820](https://github.com/ClickHouse/ClickHouse/pull/25820) ([alesapin](https://github.com/alesapin)). +* Fix an extremely rare bug which can lead to intersecting parts after `DROP PART` or background deletion of an empty part. [#25884](https://github.com/ClickHouse/ClickHouse/pull/25884) ([alesapin](https://github.com/alesapin)). +* Convert history file from readline format to replxx format. [#25888](https://github.com/ClickHouse/ClickHouse/pull/25888) ([Azat Khuzhin](https://github.com/azat)). +* Support LowCardinality, Decimal and UUID for JSON extract. Closes [#24606](https://github.com/ClickHouse/ClickHouse/issues/24606). [#25900](https://github.com/ClickHouse/ClickHouse/pull/25900) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add support for queries with a column named `"null"` (it must be specified in backticks or double quotes) and ON CLUSTER. This closes [#24035](https://github.com/ClickHouse/ClickHouse/issues/24035). [#25907](https://github.com/ClickHouse/ClickHouse/pull/25907) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correctly throw exception on attempt to parse invalid Date. This closes [#6481](https://github.com/ClickHouse/ClickHouse/issues/6481). [#25909](https://github.com/ClickHouse/ClickHouse/pull/25909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow parameters for parametric aggregate functions to be arbitrary constant expressions (e.g. `1 + 2`), not just literals. This also allows to use query parameters (in parametrized queries like `{param:UInt8}`) for parameters of parametric aggregate functions. This closes [#11607](https://github.com/ClickHouse/ClickHouse/issues/11607). [#25910](https://github.com/ClickHouse/ClickHouse/pull/25910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow `quantiles*` functions to work with `aggregate_functions_null_for_empty`. This closes [#25892](https://github.com/ClickHouse/ClickHouse/issues/25892). [#25919](https://github.com/ClickHouse/ClickHouse/pull/25919) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `sudo service clickhouse-server start` to work on systems with `systemd` like Centos 8. This closes [#14298](https://github.com/ClickHouse/ClickHouse/issues/14298). This closes [#17799](https://github.com/ClickHouse/ClickHouse/issues/17799). [#25921](https://github.com/ClickHouse/ClickHouse/pull/25921) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for unicode (e.g. Chinese, Cyrillic) components in `Nested` data types. This closes [#25594](https://github.com/ClickHouse/ClickHouse/issues/25594). [#25923](https://github.com/ClickHouse/ClickHouse/pull/25923) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow complex quoted identifiers of JOINed tables. This closes [#17861](https://github.com/ClickHouse/ClickHouse/issues/17861). [#25924](https://github.com/ClickHouse/ClickHouse/pull/25924) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added setting `optimize_move_to_prewhere_if_final`. If query has `FINAL`, the optimization `move_to_prewhere` will be enabled only if both `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are enabled. Closes [#8684](https://github.com/ClickHouse/ClickHouse/issues/8684). [#25940](https://github.com/ClickHouse/ClickHouse/pull/25940) ([Kseniia Sumarokova](https://github.com/kssenii)). +* More instrumentation for network interaction: add counters for recv/send bytes; add gauges for recvs/sends. Added missing documentation. This closes [#5897](https://github.com/ClickHouse/ClickHouse/issues/5897). [#25962](https://github.com/ClickHouse/ClickHouse/pull/25962) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Web UI: if value looks like an URL, automatically generate a link. [#25965](https://github.com/ClickHouse/ClickHouse/pull/25965) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error with query `SET SQL_SELECT_LIMIT` in mysql protocol. Closes [#17115](https://github.com/ClickHouse/ClickHouse/issues/17115). [#25972](https://github.com/ClickHouse/ClickHouse/pull/25972) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add support for argument of `UUID` type for `empty` and `notEmpty` functions. `UUID` is empty if it is all zeros (nil UUID). This closes [#3446](https://github.com/ClickHouse/ClickHouse/issues/3446). [#25974](https://github.com/ClickHouse/ClickHouse/pull/25974) ([zhaoyu](https://github.com/zxc111)). +* Add support for argument of AggregateFunction type for bin and hex functions. [#26094](https://github.com/ClickHouse/ClickHouse/pull/26094) ([zhaoyu](https://github.com/zxc111)). +* For dictionary with complex key if complex key contains only one attribute allow to not wrap key expression in tuple for functions `dictGet`, `dictHas`. [#26130](https://github.com/ClickHouse/ClickHouse/pull/26130) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Bug Fix +* `CAST` from `Date` to `DateTime` (or `DateTime64`) was not using the timezone of the `DateTime` type. It can also affect the comparison between `Date` and `DateTime`. Inference of the common type for `Date` and `DateTime` also was not using the corresponding timezone. It affected the results of function `if` and array construction. Closes [#24128](https://github.com/ClickHouse/ClickHouse/issues/24128). [#24129](https://github.com/ClickHouse/ClickHouse/pull/24129) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix assertion in PREWHERE with non-uint8 type, close [#19589](https://github.com/ClickHouse/ClickHouse/issues/19589). [#25484](https://github.com/ClickHouse/ClickHouse/pull/25484) ([Vladimir C](https://github.com/vdimir)). +* Fix `ALTER MODIFY COLUMN` of columns, which participates in TTL expressions. [#25554](https://github.com/ClickHouse/ClickHouse/pull/25554) ([Anton Popov](https://github.com/CurtizJ)). +* Fix slow dict join in some cases, close [#24209](https://github.com/ClickHouse/ClickHouse/issues/24209). [#25618](https://github.com/ClickHouse/ClickHouse/pull/25618) ([Vladimir C](https://github.com/vdimir)). +* Allow StorageMerge to access tables with aliases. Closes [#6051](https://github.com/ClickHouse/ClickHouse/issues/6051). [#25694](https://github.com/ClickHouse/ClickHouse/pull/25694) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug in `TTL` with `GROUP BY` expression which refuses to execute `TTL` after first execution in part. [#25743](https://github.com/ClickHouse/ClickHouse/pull/25743) ([alesapin](https://github.com/alesapin)). +* Fix rare bug with `DROP PART` query for `ReplicatedMergeTree` tables which can lead to error message `Unexpected merged part intersecting drop range`. [#25783](https://github.com/ClickHouse/ClickHouse/pull/25783) ([alesapin](https://github.com/alesapin)). +* Fix ARM exception handling with non default page size. Fixes [#25512](https://github.com/ClickHouse/ClickHouse/issues/25512). Fixes [#25044](https://github.com/ClickHouse/ClickHouse/issues/25044). Fixes [#24901](https://github.com/ClickHouse/ClickHouse/issues/24901). Fixes [#23183](https://github.com/ClickHouse/ClickHouse/issues/23183). Fixes [#20221](https://github.com/ClickHouse/ClickHouse/issues/20221). Fixes [#19703](https://github.com/ClickHouse/ClickHouse/issues/19703). Fixes [#19028](https://github.com/ClickHouse/ClickHouse/issues/19028). Fixes [#18391](https://github.com/ClickHouse/ClickHouse/issues/18391). Fixes [#18121](https://github.com/ClickHouse/ClickHouse/issues/18121). Fixes [#17994](https://github.com/ClickHouse/ClickHouse/issues/17994). Fixes [#12483](https://github.com/ClickHouse/ClickHouse/issues/12483). [#25854](https://github.com/ClickHouse/ClickHouse/pull/25854) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix extremely long backoff for background tasks when the background pool is full. Fixes [#25836](https://github.com/ClickHouse/ClickHouse/issues/25836). [#25893](https://github.com/ClickHouse/ClickHouse/pull/25893) ([alesapin](https://github.com/alesapin)). +* Fixed `scram-sha-256` authentication for PostgreSQL engines. Closes [#24516](https://github.com/ClickHouse/ClickHouse/issues/24516). [#25906](https://github.com/ClickHouse/ClickHouse/pull/25906) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash on call dictGet() with bad arguments. [#25913](https://github.com/ClickHouse/ClickHouse/pull/25913) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible deadlock during query profiler stack unwinding. Fixes [#25968](https://github.com/ClickHouse/ClickHouse/issues/25968). [#25970](https://github.com/ClickHouse/ClickHouse/pull/25970) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix formatting of type `Map` with integer keys to `JSON`. [#25982](https://github.com/ClickHouse/ClickHouse/pull/25982) ([Anton Popov](https://github.com/CurtizJ)). +* Fix wrong thread estimation for right subquery join in some cases. Close [#24075](https://github.com/ClickHouse/ClickHouse/issues/24075). [#26052](https://github.com/ClickHouse/ClickHouse/pull/26052) ([Vladimir C](https://github.com/vdimir)). +* Fix rare server crash because of `abort` in ZooKeeper client. Fixes [#25813](https://github.com/ClickHouse/ClickHouse/issues/25813). [#26079](https://github.com/ClickHouse/ClickHouse/pull/26079) ([alesapin](https://github.com/alesapin)). +* Fix throwing exception when iterate over non existing remote directory. [#26087](https://github.com/ClickHouse/ClickHouse/pull/26087) ([ianton-ru](https://github.com/ianton-ru)). +* Fix possible crash in `pointInPolygon` if the setting `validate_polygons` is turned off. [#26113](https://github.com/ClickHouse/ClickHouse/pull/26113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `joinGet` with LowCarinality columns, close [#25993](https://github.com/ClickHouse/ClickHouse/issues/25993). [#26118](https://github.com/ClickHouse/ClickHouse/pull/26118) ([Vladimir C](https://github.com/vdimir)). + +#### Build/Testing/Packaging Improvement +* - Syntax update: changing 'is' to '==' when necessary. [#25559](https://github.com/ClickHouse/ClickHouse/pull/25559) ([MyroTk](https://github.com/MyroTk)). +* Add new tests for checking access rights for columns used in filters (WHERE / PREWHERE / row policy) of the `SELECT` statement after changes in https://github.com/ClickHouse/ClickHouse/pull/24405. [#25619](https://github.com/ClickHouse/ClickHouse/pull/25619) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enabling all TestFlows modules and fixing some tests. [#26011](https://github.com/ClickHouse/ClickHouse/pull/26011) ([vzakaznikov](https://github.com/vzakaznikov)). +* Disabling TestFlows LDAP module due to test fails. [#26065](https://github.com/ClickHouse/ClickHouse/pull/26065) ([vzakaznikov](https://github.com/vzakaznikov)). + +#### Other +* Add `clickhouse-keeper-converter` tool which allows converting zookeeper logs and snapshots into `clickhouse-keeper` snapshot format. [#25428](https://github.com/ClickHouse/ClickHouse/pull/25428) ([alesapin](https://github.com/alesapin)). + +#### NO CL ENTRY + +* NO CL ENTRY: '[ImgBot] Optimize images'. [#26054](https://github.com/ClickHouse/ClickHouse/pull/26054) ([imgbot[bot]](https://github.com/apps/imgbot)). + diff --git a/docs/changelogs/v21.8.10.19-lts.md b/docs/changelogs/v21.8.10.19-lts.md new file mode 100644 index 00000000000..92347642ffa --- /dev/null +++ b/docs/changelogs/v21.8.10.19-lts.md @@ -0,0 +1,12 @@ +### ClickHouse release v21.8.10.19-lts FIXME as compared to v21.8.9.13-lts + +#### Improvement +* Backported in [#30452](https://github.com/ClickHouse/ClickHouse/issues/30452): Allow symlinks to files in user_files directory for file table function. [#30309](https://github.com/ClickHouse/ClickHouse/pull/30309) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29724](https://github.com/ClickHouse/ClickHouse/issues/29724): Fix null deference for `GROUP BY WITH TOTALS HAVING` (when the column from `HAVING` wasn't selected). [#29553](https://github.com/ClickHouse/ClickHouse/pull/29553) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30233](https://github.com/ClickHouse/ClickHouse/issues/30233): Fix INSERT SELECT incorrectly fills MATERIALIZED column based of Nullable column. [#30189](https://github.com/ClickHouse/ClickHouse/pull/30189) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30333](https://github.com/ClickHouse/ClickHouse/issues/30333): * Allow identifiers staring with numbers in multiple joins. [#30230](https://github.com/ClickHouse/ClickHouse/pull/30230) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#30377](https://github.com/ClickHouse/ClickHouse/issues/30377): fix replaceRegexpAll bug. [#30292](https://github.com/ClickHouse/ClickHouse/pull/30292) ([Memo](https://github.com/Joeywzr)). + diff --git a/docs/changelogs/v21.8.11.4-lts.md b/docs/changelogs/v21.8.11.4-lts.md new file mode 100644 index 00000000000..e36d75c32ea --- /dev/null +++ b/docs/changelogs/v21.8.11.4-lts.md @@ -0,0 +1,40 @@ +### ClickHouse release v21.8.11.4-lts FIXME as compared to v21.8.10.19-lts + +#### New Feature +* Backported in [#30713](https://github.com/ClickHouse/ClickHouse/issues/30713): CompiledExpressionCache limit elements size using `compiled_expression_cache_elements_size` setting. [#30667](https://github.com/ClickHouse/ClickHouse/pull/30667) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Improvement +* Backported in [#30621](https://github.com/ClickHouse/ClickHouse/issues/30621): Make query, which fetched table structure for PostgreSQL database because, more reliable. [#30477](https://github.com/ClickHouse/ClickHouse/pull/30477) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Backported in [#31368](https://github.com/ClickHouse/ClickHouse/issues/31368): Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release + +* Backported in [#30914](https://github.com/ClickHouse/ClickHouse/issues/30914): Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#28756](https://github.com/ClickHouse/ClickHouse/issues/28756): Fix NOT-IN index optimization when not all key columns are used. This fixes [#28120](https://github.com/ClickHouse/ClickHouse/issues/28120). [#28315](https://github.com/ClickHouse/ClickHouse/pull/28315) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#30825](https://github.com/ClickHouse/ClickHouse/issues/30825): Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29175](https://github.com/ClickHouse/ClickHouse/issues/29175): Fix queries to external databases (i.e. MySQL) with multiple columns in IN ( i.e. `(k,v) IN ((1, 2))` ) (but note that this has some backward incompatibility for the `clickhouse-copier` since it uses alias for tuple element). [#28888](https://github.com/ClickHouse/ClickHouse/pull/28888) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30610](https://github.com/ClickHouse/ClickHouse/issues/30610): Fix bad optimizations of ORDER BY if it contains WITH FILL. This closes [#28908](https://github.com/ClickHouse/ClickHouse/issues/28908). This closes [#26049](https://github.com/ClickHouse/ClickHouse/issues/26049). [#28910](https://github.com/ClickHouse/ClickHouse/pull/28910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#30767](https://github.com/ClickHouse/ClickHouse/issues/30767): Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Backported in [#30205](https://github.com/ClickHouse/ClickHouse/issues/30205): Fix data-race between `LogSink::writeMarks()` and `LogSource` in `StorageLog`. [#29946](https://github.com/ClickHouse/ClickHouse/pull/29946) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30463](https://github.com/ClickHouse/ClickHouse/issues/30463): Support nullable arguments in function `initializeAggregation`. [#30177](https://github.com/ClickHouse/ClickHouse/pull/30177) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#30655](https://github.com/ClickHouse/ClickHouse/issues/30655): Fix `[I]LIKE` function. Closes [#28661](https://github.com/ClickHouse/ClickHouse/issues/28661). [#30244](https://github.com/ClickHouse/ClickHouse/pull/30244) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#30289](https://github.com/ClickHouse/ClickHouse/issues/30289): Fix ComplexKeyHashedDictionary, ComplexKeySparseHashedDictionary parsing `preallocate` option from layout config. [#30246](https://github.com/ClickHouse/ClickHouse/pull/30246) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#30525](https://github.com/ClickHouse/ClickHouse/issues/30525): Fixed segfault which might happen if session expired during execution of REPLACE PARTITION. [#30432](https://github.com/ClickHouse/ClickHouse/pull/30432) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix exception handling in `parallel_view_processing`. That resolves issues / prevents crashes in some rare corner cases, when that feature is enabled and exception (like "Memory limit exceeded ...") happened in the middle of materialized view processing. [#30472](https://github.com/ClickHouse/ClickHouse/pull/30472) ([filimonov](https://github.com/filimonov)). +* Backported in [#30585](https://github.com/ClickHouse/ClickHouse/issues/30585): * Fix deadlock on ALTER with scalar subquery to the same table, close [#30461](https://github.com/ClickHouse/ClickHouse/issues/30461). [#30492](https://github.com/ClickHouse/ClickHouse/pull/30492) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#30607](https://github.com/ClickHouse/ClickHouse/issues/30607): Limit push down optimization could cause a error `Cannot find column`. Fixes [#30438](https://github.com/ClickHouse/ClickHouse/issues/30438). [#30562](https://github.com/ClickHouse/ClickHouse/pull/30562) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#30753](https://github.com/ClickHouse/ClickHouse/issues/30753): Functions for case-insensitive search in UTF8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30710](https://github.com/ClickHouse/ClickHouse/issues/30710): Fix PREWHERE with WHERE in case of always true PREWHERE. [#30668](https://github.com/ClickHouse/ClickHouse/pull/30668) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30770](https://github.com/ClickHouse/ClickHouse/issues/30770): Fixed a race condition between `REPLACE/MOVE PARTITION` and background merge in non-replicated `MergeTree` that might cause a part of moved/replaced data to remain in partition. Fixes [#29327](https://github.com/ClickHouse/ClickHouse/issues/29327). [#30717](https://github.com/ClickHouse/ClickHouse/pull/30717) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30857](https://github.com/ClickHouse/ClickHouse/issues/30857): Fixed ambiguity when extracting auxiliary ZooKeeper name from ZooKeeper path in `ReplicatedMergeTree`. Previously server might fail to start with `Unknown auxiliary ZooKeeper name` if ZooKeeper path contains a colon. Fixes [#29052](https://github.com/ClickHouse/ClickHouse/issues/29052). Also it was allowed to specify ZooKeeper path that does not start with slash, but now it's deprecated and creation of new tables with such path is not allowed. Slashes and colons in auxiliary ZooKeeper names are not allowed too. [#30822](https://github.com/ClickHouse/ClickHouse/pull/30822) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30925](https://github.com/ClickHouse/ClickHouse/issues/30925): Fix set index not used in AND/OR expressions when there are more than two operands. This fixes [#30416](https://github.com/ClickHouse/ClickHouse/issues/30416) . [#30887](https://github.com/ClickHouse/ClickHouse/pull/30887) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#31151](https://github.com/ClickHouse/ClickHouse/issues/31151): Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Backported in [#31040](https://github.com/ClickHouse/ClickHouse/issues/31040): Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31132](https://github.com/ClickHouse/ClickHouse/issues/31132): Fix JSONValue/Query with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31372](https://github.com/ClickHouse/ClickHouse/issues/31372): Fix StorageMerge with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v21.8.12.29-lts.md b/docs/changelogs/v21.8.12.29-lts.md new file mode 100644 index 00000000000..63b34c367f3 --- /dev/null +++ b/docs/changelogs/v21.8.12.29-lts.md @@ -0,0 +1,24 @@ +### ClickHouse release v21.8.12.29-lts FIXME as compared to v21.8.11.4-lts + +#### Performance Improvement +* Backported in [#31732](https://github.com/ClickHouse/ClickHouse/issues/31732): Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#31575](https://github.com/ClickHouse/ClickHouse/issues/31575): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#31204](https://github.com/ClickHouse/ClickHouse/issues/31204): Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31253](https://github.com/ClickHouse/ClickHouse/issues/31253): Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). +* Backported in [#31521](https://github.com/ClickHouse/ClickHouse/issues/31521): Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Backported in [#31552](https://github.com/ClickHouse/ClickHouse/issues/31552): Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#31582](https://github.com/ClickHouse/ClickHouse/issues/31582): * Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). * Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#31599](https://github.com/ClickHouse/ClickHouse/issues/31599): Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Backported in [#31779](https://github.com/ClickHouse/ClickHouse/issues/31779): `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31790](https://github.com/ClickHouse/ClickHouse/issues/31790): Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31829](https://github.com/ClickHouse/ClickHouse/issues/31829): Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31813](https://github.com/ClickHouse/ClickHouse/issues/31813): Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31761](https://github.com/ClickHouse/ClickHouse/issues/31761): Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#31890](https://github.com/ClickHouse/ClickHouse/issues/31890): Fix possible assertion `../src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion '!hasPendingData()' failed.` in TSKV format. [#31804](https://github.com/ClickHouse/ClickHouse/pull/31804) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31910](https://github.com/ClickHouse/ClickHouse/issues/31910): Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v21.8.13.6-lts.md b/docs/changelogs/v21.8.13.6-lts.md new file mode 100644 index 00000000000..06e8c366ff2 --- /dev/null +++ b/docs/changelogs/v21.8.13.6-lts.md @@ -0,0 +1,29 @@ +### ClickHouse release v21.8.13.6-lts FIXME as compared to v21.8.12.29-lts + +#### Bug Fix +* Backported in [#32688](https://github.com/ClickHouse/ClickHouse/issues/32688): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). +* Backported in [#32343](https://github.com/ClickHouse/ClickHouse/issues/32343): Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#32108](https://github.com/ClickHouse/ClickHouse/issues/32108): Fix crash with empty result on odbc query. Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#32150](https://github.com/ClickHouse/ClickHouse/issues/32150): Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32075](https://github.com/ClickHouse/ClickHouse/issues/32075): Fix a bug about function transform with decimal args. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([Shuai li](https://github.com/loneylee)). +* Backported in [#31955](https://github.com/ClickHouse/ClickHouse/issues/31955): - Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler` - Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). +* Backported in [#32161](https://github.com/ClickHouse/ClickHouse/issues/32161): Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32146](https://github.com/ClickHouse/ClickHouse/issues/32146): Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32213](https://github.com/ClickHouse/ClickHouse/issues/32213): Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32312](https://github.com/ClickHouse/ClickHouse/issues/32312): XML dictionaries identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#32540](https://github.com/ClickHouse/ClickHouse/issues/32540): Some replication queue entries might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#32353](https://github.com/ClickHouse/ClickHouse/issues/32353): Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#33048](https://github.com/ClickHouse/ClickHouse/issues/33048): Fix possible exception at RabbitMQ storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#32795](https://github.com/ClickHouse/ClickHouse/issues/32795): fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release): + +* Backported in [#32659](https://github.com/ClickHouse/ClickHouse/issues/32659): Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'fix json error after downgrade'. [#33166](https://github.com/ClickHouse/ClickHouse/pull/33166) ([bullet1337](https://github.com/bullet1337)). + diff --git a/docs/changelogs/v21.8.14.5-lts.md b/docs/changelogs/v21.8.14.5-lts.md new file mode 100644 index 00000000000..481327a35c9 --- /dev/null +++ b/docs/changelogs/v21.8.14.5-lts.md @@ -0,0 +1,8 @@ +### ClickHouse release v21.8.14.5-lts FIXME as compared to v21.8.13.6-lts + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#33184](https://github.com/ClickHouse/ClickHouse/issues/33184): Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#33659](https://github.com/ClickHouse/ClickHouse/issues/33659): Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#33206](https://github.com/ClickHouse/ClickHouse/issues/33206): Fix s3 table function reading empty file. Closes [#33008](https://github.com/ClickHouse/ClickHouse/issues/33008). [#33037](https://github.com/ClickHouse/ClickHouse/pull/33037) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v21.8.15.7-lts.md b/docs/changelogs/v21.8.15.7-lts.md new file mode 100644 index 00000000000..7411fbff9ae --- /dev/null +++ b/docs/changelogs/v21.8.15.7-lts.md @@ -0,0 +1,7 @@ +### ClickHouse release v21.8.15.7-lts FIXME as compared to v21.8.14.5-lts + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#34121](https://github.com/ClickHouse/ClickHouse/issues/34121): Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#34097](https://github.com/ClickHouse/ClickHouse/issues/34097): Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v21.8.2.19-prestable.md b/docs/changelogs/v21.8.2.19-prestable.md new file mode 100644 index 00000000000..15726747e65 --- /dev/null +++ b/docs/changelogs/v21.8.2.19-prestable.md @@ -0,0 +1,29 @@ +### ClickHouse release v21.8.2.19-prestable FIXME as compared to v21.8.1.7409-prestable + +#### Performance Improvement +* Backported in [#26524](https://github.com/ClickHouse/ClickHouse/issues/26524): Improve latency of short queries, that require reading from tables with large number of columns. [#26371](https://github.com/ClickHouse/ClickHouse/pull/26371) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* Backported in [#26283](https://github.com/ClickHouse/ClickHouse/issues/26283): Fix `optimize_skip_unused_shards_rewrite_in` for non-UInt64 types (may select incorrect shards eventually or throw `Cannot infer type of an empty tuple` or `Function tuple requires at least one argument`). [#25798](https://github.com/ClickHouse/ClickHouse/pull/25798) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#26587](https://github.com/ClickHouse/ClickHouse/issues/26587): Fixed `Not found column ...` and `Missing column ...` errors when selecting from `MaterializeMySQL`. Fixes [#23708](https://github.com/ClickHouse/ClickHouse/issues/23708), [#24830](https://github.com/ClickHouse/ClickHouse/issues/24830), [#25794](https://github.com/ClickHouse/ClickHouse/issues/25794). [#25822](https://github.com/ClickHouse/ClickHouse/pull/25822) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26190](https://github.com/ClickHouse/ClickHouse/issues/26190): Fix sharding_key from column w/o function for remote() (before `select * from remote('127.1', system.one, dummy)` leads to `Unknown column: dummy, there are only columns .` error). [#25824](https://github.com/ClickHouse/ClickHouse/pull/25824) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#26284](https://github.com/ClickHouse/ClickHouse/issues/26284): Fix possible mismatched header when using normal projection with prewhere. This fixes [#26020](https://github.com/ClickHouse/ClickHouse/issues/26020). [#26038](https://github.com/ClickHouse/ClickHouse/pull/26038) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#26298](https://github.com/ClickHouse/ClickHouse/issues/26298): Fixed incorrect `sequence_id` in MySQL protocol packets that ClickHouse sends on exception during query execution. It might cause MySQL client to reset connection to ClickHouse server. Fixes [#21184](https://github.com/ClickHouse/ClickHouse/issues/21184). [#26051](https://github.com/ClickHouse/ClickHouse/pull/26051) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26204](https://github.com/ClickHouse/ClickHouse/issues/26204): Fix potential crash if more than one `untuple` expression is used. [#26179](https://github.com/ClickHouse/ClickHouse/pull/26179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#26228](https://github.com/ClickHouse/ClickHouse/issues/26228): Remove excessive newline in `thread_name` column in `system.stack_trace` table. This fixes [#24124](https://github.com/ClickHouse/ClickHouse/issues/24124). [#26210](https://github.com/ClickHouse/ClickHouse/pull/26210) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#26358](https://github.com/ClickHouse/ClickHouse/issues/26358): Fix logical error on join with totals, close [#26017](https://github.com/ClickHouse/ClickHouse/issues/26017). [#26250](https://github.com/ClickHouse/ClickHouse/pull/26250) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#26788](https://github.com/ClickHouse/ClickHouse/issues/26788): Fix zstd decompression in case there are escape sequences at the end of internal buffer. Closes [#26013](https://github.com/ClickHouse/ClickHouse/issues/26013). [#26314](https://github.com/ClickHouse/ClickHouse/pull/26314) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#26360](https://github.com/ClickHouse/ClickHouse/issues/26360): Fixed rare bug in lost replica recovery that may cause replicas to diverge. [#26321](https://github.com/ClickHouse/ClickHouse/pull/26321) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26452](https://github.com/ClickHouse/ClickHouse/issues/26452): Fix `optimize_distributed_group_by_sharding_key` for multiple columns (leads to incorrect result w/ `optimize_skip_unused_shards=1`/`allow_nondeterministic_optimize_skip_unused_shards=1` and multiple columns in sharding key expression). [#26353](https://github.com/ClickHouse/ClickHouse/pull/26353) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#26422](https://github.com/ClickHouse/ClickHouse/issues/26422): Fix possible crash when login as dropped user. This PR fixes [#26073](https://github.com/ClickHouse/ClickHouse/issues/26073). [#26363](https://github.com/ClickHouse/ClickHouse/pull/26363) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#26417](https://github.com/ClickHouse/ClickHouse/issues/26417): Fix infinite non joined block stream in `partial_merge_join` close [#26325](https://github.com/ClickHouse/ClickHouse/issues/26325). [#26374](https://github.com/ClickHouse/ClickHouse/pull/26374) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#26448](https://github.com/ClickHouse/ClickHouse/issues/26448): Fix some fuzzed msan crash. Fixes [#22517](https://github.com/ClickHouse/ClickHouse/issues/22517). [#26428](https://github.com/ClickHouse/ClickHouse/pull/26428) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#26488](https://github.com/ClickHouse/ClickHouse/issues/26488): Fix broken name resolution after rewriting column aliases. This fixes [#26432](https://github.com/ClickHouse/ClickHouse/issues/26432). [#26475](https://github.com/ClickHouse/ClickHouse/pull/26475) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#26613](https://github.com/ClickHouse/ClickHouse/issues/26613): Fix issues with `CREATE DICTIONARY` query if dictionary name or database name was quoted. Closes [#26491](https://github.com/ClickHouse/ClickHouse/issues/26491). [#26508](https://github.com/ClickHouse/ClickHouse/pull/26508) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#26605](https://github.com/ClickHouse/ClickHouse/issues/26605): Fix crash in rabbitmq shutdown in case rabbitmq setup was not started. Closes [#26504](https://github.com/ClickHouse/ClickHouse/issues/26504). [#26529](https://github.com/ClickHouse/ClickHouse/pull/26529) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#26608](https://github.com/ClickHouse/ClickHouse/issues/26608): Update `chown` cmd check in clickhouse-server docker entrypoint. It fixes the bug that cluster pod restart failed (or timeout) on kubernetes. [#26545](https://github.com/ClickHouse/ClickHouse/pull/26545) ([Ky Li](https://github.com/Kylinrix)). +* Backported in [#26648](https://github.com/ClickHouse/ClickHouse/issues/26648): Fix incorrect function names of groupBitmapAnd/Or/Xor. This fixes. [#26557](https://github.com/ClickHouse/ClickHouse/pull/26557) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#26644](https://github.com/ClickHouse/ClickHouse/issues/26644): Fix history file conversion if file is empty. [#26589](https://github.com/ClickHouse/ClickHouse/pull/26589) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#26705](https://github.com/ClickHouse/ClickHouse/issues/26705): Fix potential nullptr dereference in window functions. This fixes [#25276](https://github.com/ClickHouse/ClickHouse/issues/25276). [#26668](https://github.com/ClickHouse/ClickHouse/pull/26668) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#26771](https://github.com/ClickHouse/ClickHouse/issues/26771): Sometimes SET ROLE could work incorrectly, this PR fixes that. [#26707](https://github.com/ClickHouse/ClickHouse/pull/26707) ([Vitaly Baranov](https://github.com/vitlibar)). + diff --git a/docs/changelogs/v21.8.3.44-lts.md b/docs/changelogs/v21.8.3.44-lts.md new file mode 100644 index 00000000000..21fe655870a --- /dev/null +++ b/docs/changelogs/v21.8.3.44-lts.md @@ -0,0 +1,28 @@ +### ClickHouse release v21.8.3.44-lts FIXME as compared to v21.8.2.19-prestable + +#### Improvement +* Rename setting. [#26844](https://github.com/ClickHouse/ClickHouse/pull/26844) ([ianton-ru](https://github.com/ianton-ru)). + +#### Bug Fix +* Backported in [#27545](https://github.com/ClickHouse/ClickHouse/issues/27545): Now, scalar subquery always returns `Nullable` result if it's type can be `Nullable`. It is needed because in case of empty subquery it's result should be `Null`. Previously, it was possible to get error about incompatible types (type deduction does not execute scalar subquery, and it could use not-nullable type). Scalar subquery with empty result which can't be converted to `Nullable` (like `Array` or `Tuple`) now throws error. Fixes [#25411](https://github.com/ClickHouse/ClickHouse/issues/25411). [#26423](https://github.com/ClickHouse/ClickHouse/pull/26423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#26856](https://github.com/ClickHouse/ClickHouse/issues/26856): ParallelFormattingOutputFormat: Use mutex to handle the join to the collector_thread (https://github.com/ClickHouse/ClickHouse/issues/26694). [#26703](https://github.com/ClickHouse/ClickHouse/pull/26703) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#26941](https://github.com/ClickHouse/ClickHouse/issues/26941): Do not remove data on ReplicatedMergeTree table shutdown to avoid creating data to metadata inconsistency. [#26716](https://github.com/ClickHouse/ClickHouse/pull/26716) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#26981](https://github.com/ClickHouse/ClickHouse/issues/26981): Aggregate function parameters might be lost when applying some combinators causing exceptions like `Conversion from AggregateFunction(topKArray, Array(String)) to AggregateFunction(topKArray(10), Array(String)) is not supported`. It's fixed. Fixes [#26196](https://github.com/ClickHouse/ClickHouse/issues/26196) and [#26433](https://github.com/ClickHouse/ClickHouse/issues/26433). [#26814](https://github.com/ClickHouse/ClickHouse/pull/26814) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26909](https://github.com/ClickHouse/ClickHouse/issues/26909): Fix library-bridge ids load. [#26834](https://github.com/ClickHouse/ClickHouse/pull/26834) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#26945](https://github.com/ClickHouse/ClickHouse/issues/26945): Fix error `Missing columns: 'xxx'` when `DEFAULT` column references other non materialized column without `DEFAULT` expression. Fixes [#26591](https://github.com/ClickHouse/ClickHouse/issues/26591). [#26900](https://github.com/ClickHouse/ClickHouse/pull/26900) ([alesapin](https://github.com/alesapin)). +* Backported in [#26997](https://github.com/ClickHouse/ClickHouse/issues/26997): Fix reading of custom TLDs (stops processing with lower buffer or bigger file). [#26948](https://github.com/ClickHouse/ClickHouse/pull/26948) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27030](https://github.com/ClickHouse/ClickHouse/issues/27030): Now partition ID in queries like `ALTER TABLE ... PARTITION ID xxx` validates for correctness. Fixes [#25718](https://github.com/ClickHouse/ClickHouse/issues/25718). [#26963](https://github.com/ClickHouse/ClickHouse/pull/26963) ([alesapin](https://github.com/alesapin)). +* Backported in [#27053](https://github.com/ClickHouse/ClickHouse/issues/27053): [RFC] Fix possible mutation stack due to race with DROP_RANGE. [#27002](https://github.com/ClickHouse/ClickHouse/pull/27002) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27106](https://github.com/ClickHouse/ClickHouse/issues/27106): Fixed `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache` configuration parsing. Options `allow_read_expired_keys`, `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds` were not parsed for dictionaries with non `cache` type. [#27032](https://github.com/ClickHouse/ClickHouse/pull/27032) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#27160](https://github.com/ClickHouse/ClickHouse/issues/27160): Fix synchronization in GRPCServer This PR fixes [#27024](https://github.com/ClickHouse/ClickHouse/issues/27024). [#27064](https://github.com/ClickHouse/ClickHouse/pull/27064) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27365](https://github.com/ClickHouse/ClickHouse/issues/27365): - Fix uninitialized memory in functions `multiSearch*` with empty array, close [#27169](https://github.com/ClickHouse/ClickHouse/issues/27169). [#27181](https://github.com/ClickHouse/ClickHouse/pull/27181) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#27263](https://github.com/ClickHouse/ClickHouse/issues/27263): In rare cases `system.detached_parts` table might contain incorrect information for some parts, it's fixed. Fixes [#27114](https://github.com/ClickHouse/ClickHouse/issues/27114). [#27183](https://github.com/ClickHouse/ClickHouse/pull/27183) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#27555](https://github.com/ClickHouse/ClickHouse/issues/27555): Fix mutation stuck on invalid partitions in non-replicated MergeTree. [#27248](https://github.com/ClickHouse/ClickHouse/pull/27248) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27411](https://github.com/ClickHouse/ClickHouse/issues/27411): Fix `distributed_group_by_no_merge=2`+`distributed_push_down_limit=1` or `optimize_distributed_group_by_sharding_key=1` with `LIMIT BY` and `LIMIT OFFSET`. [#27249](https://github.com/ClickHouse/ClickHouse/pull/27249) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#27507](https://github.com/ClickHouse/ClickHouse/issues/27507): Fix errors like `Expected ColumnLowCardinality, gotUInt8` or `Bad cast from type DB::ColumnVector to DB::ColumnLowCardinality` for some queries with `LowCardinality` in `PREWHERE`. Fixes [#23515](https://github.com/ClickHouse/ClickHouse/issues/23515). [#27298](https://github.com/ClickHouse/ClickHouse/pull/27298) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#27417](https://github.com/ClickHouse/ClickHouse/issues/27417): Fix `Cannot find column` error for queries with sampling. Was introduced in [#24574](https://github.com/ClickHouse/ClickHouse/issues/24574). Fixes [#26522](https://github.com/ClickHouse/ClickHouse/issues/26522). [#27301](https://github.com/ClickHouse/ClickHouse/pull/27301) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#27414](https://github.com/ClickHouse/ClickHouse/issues/27414): Fixed incorrect validation of partition id for MergeTree tables that created with old syntax. [#27328](https://github.com/ClickHouse/ClickHouse/pull/27328) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#27540](https://github.com/ClickHouse/ClickHouse/issues/27540): Fix incorrect result for query with row-level security, prewhere and LowCardinality filter. Fixes [#27179](https://github.com/ClickHouse/ClickHouse/issues/27179). [#27329](https://github.com/ClickHouse/ClickHouse/pull/27329) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#27419](https://github.com/ClickHouse/ClickHouse/issues/27419): /proc/info contains metrics like. [#27361](https://github.com/ClickHouse/ClickHouse/pull/27361) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#27472](https://github.com/ClickHouse/ClickHouse/issues/27472): fix metric BackgroundMessageBrokerSchedulePoolTask, maybe mistyped。. [#27452](https://github.com/ClickHouse/ClickHouse/pull/27452) ([Ben](https://github.com/benbiti)). + diff --git a/docs/changelogs/v21.8.4.51-lts.md b/docs/changelogs/v21.8.4.51-lts.md new file mode 100644 index 00000000000..a8494ebb1d1 --- /dev/null +++ b/docs/changelogs/v21.8.4.51-lts.md @@ -0,0 +1,11 @@ +### ClickHouse release v21.8.4.51-lts FIXME as compared to v21.8.3.44-lts + +#### Bug Fix +* Backported in [#27650](https://github.com/ClickHouse/ClickHouse/issues/27650): Fix crash during projection materialization when some parts contain missing columns. This fixes [#27512](https://github.com/ClickHouse/ClickHouse/issues/27512). [#27528](https://github.com/ClickHouse/ClickHouse/pull/27528) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#27625](https://github.com/ClickHouse/ClickHouse/issues/27625): Fixed underflow of the time value when constructing it from components. Closes [#27193](https://github.com/ClickHouse/ClickHouse/issues/27193). [#27605](https://github.com/ClickHouse/ClickHouse/pull/27605) ([Vasily Nemkov](https://github.com/Enmk)). +* Backported in [#27779](https://github.com/ClickHouse/ClickHouse/issues/27779): - Fix bug with aliased column in `Distributed` table. [#27652](https://github.com/ClickHouse/ClickHouse/pull/27652) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#27713](https://github.com/ClickHouse/ClickHouse/issues/27713): Fixed another case of `Unexpected merged part ... intersecting drop range ...` error. [#27656](https://github.com/ClickHouse/ClickHouse/pull/27656) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#27673](https://github.com/ClickHouse/ClickHouse/issues/27673): Fix postgresql table function resulting in non-closing connections. Closes [#26088](https://github.com/ClickHouse/ClickHouse/issues/26088). [#27662](https://github.com/ClickHouse/ClickHouse/pull/27662) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#27700](https://github.com/ClickHouse/ClickHouse/issues/27700): Fix bad type cast when functions like `arrayHas` are applied to arrays of LowCardinality of Nullable of different non-numeric types like `DateTime` and `DateTime64`. In previous versions bad cast occurs. In new version it will lead to exception. This closes [#26330](https://github.com/ClickHouse/ClickHouse/issues/26330). [#27682](https://github.com/ClickHouse/ClickHouse/pull/27682) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#27747](https://github.com/ClickHouse/ClickHouse/issues/27747): Remove duplicated source files in CMakeLists.txt in arrow-cmake. [#27736](https://github.com/ClickHouse/ClickHouse/pull/27736) ([李扬](https://github.com/taiyang-li)). + diff --git a/docs/changelogs/v21.8.5.7-lts.md b/docs/changelogs/v21.8.5.7-lts.md new file mode 100644 index 00000000000..d78eb98b472 --- /dev/null +++ b/docs/changelogs/v21.8.5.7-lts.md @@ -0,0 +1,31 @@ +### ClickHouse release v21.8.5.7-lts FIXME as compared to v21.8.4.51-lts + +#### Improvement +* Backported in [#27893](https://github.com/ClickHouse/ClickHouse/issues/27893): Allow symlinks for library dictionaty path. [#27815](https://github.com/ClickHouse/ClickHouse/pull/27815) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#28154](https://github.com/ClickHouse/ClickHouse/issues/28154): Use Multipart copy upload for large S3 objects. [#27858](https://github.com/ClickHouse/ClickHouse/pull/27858) ([ianton-ru](https://github.com/ianton-ru)). +* Backported in [#28463](https://github.com/ClickHouse/ClickHouse/issues/28463): Do not allow creating StorageMaterializedPostgreSQL with bad arguments. Closes [#28423](https://github.com/ClickHouse/ClickHouse/issues/28423). [#28430](https://github.com/ClickHouse/ClickHouse/pull/28430) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Backported in [#28033](https://github.com/ClickHouse/ClickHouse/issues/28033): Bugfix for windowFunnel's "strict" mode. This fixes [#27469](https://github.com/ClickHouse/ClickHouse/issues/27469). [#27563](https://github.com/ClickHouse/ClickHouse/pull/27563) ([achimbab](https://github.com/achimbab)). +* Backported in [#27782](https://github.com/ClickHouse/ClickHouse/issues/27782): Fix column filtering with union distinct in subquery. Closes [#27578](https://github.com/ClickHouse/ClickHouse/issues/27578). [#27689](https://github.com/ClickHouse/ClickHouse/pull/27689) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#27867](https://github.com/ClickHouse/ClickHouse/issues/27867): Prevent crashes for some formats when NULL (tombstone) message was coming from Kafka. Closes [#19255](https://github.com/ClickHouse/ClickHouse/issues/19255). [#27794](https://github.com/ClickHouse/ClickHouse/pull/27794) ([filimonov](https://github.com/filimonov)). +* Backported in [#28346](https://github.com/ClickHouse/ClickHouse/issues/28346): Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). +* Backported in [#27855](https://github.com/ClickHouse/ClickHouse/issues/27855): Fix a couple of bugs that may cause replicas to diverge. [#27808](https://github.com/ClickHouse/ClickHouse/pull/27808) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#27958](https://github.com/ClickHouse/ClickHouse/issues/27958): Fix selecting with extremes from a column of the type `LowCardinality(UUID)`. [#27918](https://github.com/ClickHouse/ClickHouse/pull/27918) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27953](https://github.com/ClickHouse/ClickHouse/issues/27953): Check cluster name before creating Distributed table, do not allow to create a table with incorrect cluster name. Fixes [#27832](https://github.com/ClickHouse/ClickHouse/issues/27832). [#27927](https://github.com/ClickHouse/ClickHouse/pull/27927) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28209](https://github.com/ClickHouse/ClickHouse/issues/28209): Fix cases, when read buffer fails with 'attempt to read after end of file'. Closes [#26149](https://github.com/ClickHouse/ClickHouse/issues/26149). [#28150](https://github.com/ClickHouse/ClickHouse/pull/28150) ([Filatenkov Artur](https://github.com/FArthur-cmd)). + +#### Build/Testing/Packaging Improvement +* Backported in [#28031](https://github.com/ClickHouse/ClickHouse/issues/28031): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#27974](https://github.com/ClickHouse/ClickHouse/issues/27974): Fix handling null value with type of Nullable(String) in function JSONExtract. This fixes [#27929](https://github.com/ClickHouse/ClickHouse/issues/27929) and [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930) . This was introduced in https://github.com/ClickHouse/ClickHouse/pull/25452 . [#27939](https://github.com/ClickHouse/ClickHouse/pull/27939) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#28117](https://github.com/ClickHouse/ClickHouse/issues/28117): Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#28180](https://github.com/ClickHouse/ClickHouse/issues/28180): Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). +* Backported in [#28254](https://github.com/ClickHouse/ClickHouse/issues/28254): Fix incorrect behavior in `clickhouse-keeper` when list watches (`getChildren`) triggered with `set` requests for children. [#28190](https://github.com/ClickHouse/ClickHouse/pull/28190) ([alesapin](https://github.com/alesapin)). +* Backported in [#28342](https://github.com/ClickHouse/ClickHouse/issues/28342): Fix a rare bug in `clickhouse-keeper` when the client can receive a watch response before request-response. [#28197](https://github.com/ClickHouse/ClickHouse/pull/28197) ([alesapin](https://github.com/alesapin)). +* Backported in [#28263](https://github.com/ClickHouse/ClickHouse/issues/28263): Fix possible read of uninitialized memory for queries with `Nullable(LowCardinality)` type and extremes. Fixes [#28165](https://github.com/ClickHouse/ClickHouse/issues/28165). [#28205](https://github.com/ClickHouse/ClickHouse/pull/28205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#28292](https://github.com/ClickHouse/ClickHouse/issues/28292): Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28402](https://github.com/ClickHouse/ClickHouse/issues/28402): Fix intersecting parts due to new part had been replaced with an empty part. [#28310](https://github.com/ClickHouse/ClickHouse/pull/28310) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.8.6.15-lts.md b/docs/changelogs/v21.8.6.15-lts.md new file mode 100644 index 00000000000..de38572d94a --- /dev/null +++ b/docs/changelogs/v21.8.6.15-lts.md @@ -0,0 +1,25 @@ +### ClickHouse release v21.8.6.15-lts FIXME as compared to v21.8.5.7-lts + +#### Improvement +* Backported in [#28899](https://github.com/ClickHouse/ClickHouse/issues/28899): Use real tmp file instead of predefined "rows_sources" for vertical merges. This avoids generating garbage directories in tmp disks. [#28299](https://github.com/ClickHouse/ClickHouse/pull/28299) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#28642](https://github.com/ClickHouse/ClickHouse/issues/28642): Fix strange sessions expiration logic in Keeper. Probably it should help in CI: https://clickhouse-test-reports.s3.yandex.net/0/6bd9b82141c98dcd7796fd9d08326831095ba519/stress_test_(debug).html#fail1. [#28519](https://github.com/ClickHouse/ClickHouse/pull/28519) ([alesapin](https://github.com/alesapin)). +* Backported in [#28718](https://github.com/ClickHouse/ClickHouse/issues/28718): To be added. Closes [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#28614](https://github.com/ClickHouse/ClickHouse/pull/28614) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Backported in [#27923](https://github.com/ClickHouse/ClickHouse/issues/27923): Fix PostgreSQL-style cast (`::` operator) with negative numbers. [#27876](https://github.com/ClickHouse/ClickHouse/pull/27876) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28753](https://github.com/ClickHouse/ClickHouse/issues/28753): Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#28644](https://github.com/ClickHouse/ClickHouse/issues/28644): Fix rare case when changes of `clickhouse-keeper` settings may lead to lost logs and server hung. [#28360](https://github.com/ClickHouse/ClickHouse/pull/28360) ([alesapin](https://github.com/alesapin)). +* Backported in [#28508](https://github.com/ClickHouse/ClickHouse/issues/28508): Fix lack of quotes for table names in MaterializedPostgreSQL engine. Closes [#28316](https://github.com/ClickHouse/ClickHouse/issues/28316). [#28433](https://github.com/ClickHouse/ClickHouse/pull/28433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#28510](https://github.com/ClickHouse/ClickHouse/issues/28510): Fixed possible ZooKeeper watches leak on background processing of distributed DDL queue. Closes [#26036](https://github.com/ClickHouse/ClickHouse/issues/26036). [#28446](https://github.com/ClickHouse/ClickHouse/pull/28446) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28573](https://github.com/ClickHouse/ClickHouse/issues/28573): Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after alter of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). +* Backported in [#28597](https://github.com/ClickHouse/ClickHouse/issues/28597): Fix `There is no subcolumn` error, while select from tables, which have `Nested` columns and scalar columns with dot in name and the same prefix as `Nested` (e.g. `n.id UInt32, n.arr1 Array(UInt64), n.arr2 Array(UInt64)`). [#28531](https://github.com/ClickHouse/ClickHouse/pull/28531) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28714](https://github.com/ClickHouse/ClickHouse/issues/28714): Add Settings.Names, Settings.Values aliases for system.processes table. [#28685](https://github.com/ClickHouse/ClickHouse/pull/28685) ([Vitaly Orlov](https://github.com/orloffv)). +* Backported in [#28741](https://github.com/ClickHouse/ClickHouse/issues/28741): Fix the coredump in the creation of distributed tables, when the parameters passed in are wrong. [#28686](https://github.com/ClickHouse/ClickHouse/pull/28686) ([Zhiyong Wang](https://github.com/ljcui)). +* Backported in [#28791](https://github.com/ClickHouse/ClickHouse/issues/28791): Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). +* Backported in [#28994](https://github.com/ClickHouse/ClickHouse/issues/28994): Fixed a race condition between `DROP PART` and `REPLACE/MOVE PARTITION` that might cause replicas to diverge in rare cases. [#28864](https://github.com/ClickHouse/ClickHouse/pull/28864) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28948](https://github.com/ClickHouse/ClickHouse/issues/28948): Fix reading of subcolumns from compact parts. [#28873](https://github.com/ClickHouse/ClickHouse/pull/28873) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28930](https://github.com/ClickHouse/ClickHouse/issues/28930): Fix higher-order array functions (`SIGSEGV` for `arrayCompact`/`ILLEGAL_COLUMN` for `arrayDifference`/`arrayCumSumNonNegative`) with consts. [#28904](https://github.com/ClickHouse/ClickHouse/pull/28904) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.8.7.22-lts.md b/docs/changelogs/v21.8.7.22-lts.md new file mode 100644 index 00000000000..92ba59f13cd --- /dev/null +++ b/docs/changelogs/v21.8.7.22-lts.md @@ -0,0 +1,8 @@ +### ClickHouse release v21.8.7.22-lts FIXME as compared to v21.8.6.15-lts + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29121](https://github.com/ClickHouse/ClickHouse/issues/29121): Better check for connection usability and also catch any exception in RabbitMQ shutdown just in case. [#28797](https://github.com/ClickHouse/ClickHouse/pull/28797) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29027](https://github.com/ClickHouse/ClickHouse/issues/29027): Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29193](https://github.com/ClickHouse/ClickHouse/issues/29193): Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v21.8.8.29-lts.md b/docs/changelogs/v21.8.8.29-lts.md new file mode 100644 index 00000000000..199be63424c --- /dev/null +++ b/docs/changelogs/v21.8.8.29-lts.md @@ -0,0 +1,13 @@ +### ClickHouse release v21.8.8.29-lts FIXME as compared to v21.8.7.22-lts + +#### Bug Fix +* Backported in [#29128](https://github.com/ClickHouse/ClickHouse/issues/29128): Fix bug in `clickhouse-keeper-converter` which can lead to incorrect ZooKeeper log deserialization. [#29071](https://github.com/ClickHouse/ClickHouse/pull/29071) ([小路](https://github.com/nicelulu)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29262](https://github.com/ClickHouse/ClickHouse/issues/29262): Fix invalid constant type conversion when nullable or lowcardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#29106](https://github.com/ClickHouse/ClickHouse/issues/29106): Fix waiting for mutation with `mutations_sync=2`. [#28889](https://github.com/ClickHouse/ClickHouse/pull/28889) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29357](https://github.com/ClickHouse/ClickHouse/issues/29357): Fix possible `Table columns structure in ZooKeeper is different from local table structure` exception while recreating or creating new replicas of `ReplicatedMergeTree`, when one of table columns have default expressions with case-insensitive functions. [#29266](https://github.com/ClickHouse/ClickHouse/pull/29266) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#29447](https://github.com/ClickHouse/ClickHouse/issues/29447): Fix failed assertion in ReadBufferFromHDFS. Update libhdfs3 library to be able to run in tests in debug. Closes [#29251](https://github.com/ClickHouse/ClickHouse/issues/29251). Closes [#27814](https://github.com/ClickHouse/ClickHouse/issues/27814). [#29276](https://github.com/ClickHouse/ClickHouse/pull/29276) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29303](https://github.com/ClickHouse/ClickHouse/issues/29303): Fix connection timeouts (`send_timeout`/`receive_timeout`). [#29282](https://github.com/ClickHouse/ClickHouse/pull/29282) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v21.8.9.13-lts.md b/docs/changelogs/v21.8.9.13-lts.md new file mode 100644 index 00000000000..e4cd5f45b9b --- /dev/null +++ b/docs/changelogs/v21.8.9.13-lts.md @@ -0,0 +1,31 @@ +### ClickHouse release v21.8.9.13-lts FIXME as compared to v21.8.8.29-lts + +#### Improvement +* Backported in [#29941](https://github.com/ClickHouse/ClickHouse/issues/29941): Update zoneinfo files to 2021c. [#29925](https://github.com/ClickHouse/ClickHouse/pull/29925) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#29817](https://github.com/ClickHouse/ClickHouse/issues/29817): Allow using a materialized column as the sharding key in a distributed table even if `insert_allow_materialized_columns=0`:. [#28637](https://github.com/ClickHouse/ClickHouse/pull/28637) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#29973](https://github.com/ClickHouse/ClickHouse/issues/29973): Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29676](https://github.com/ClickHouse/ClickHouse/issues/29676): Fix vertical merges of projection parts. This fixes [#29253](https://github.com/ClickHouse/ClickHouse/issues/29253) . This PR also fixes several projection merge/mutation issues introduced in https://github.com/ClickHouse/ClickHouse/pull/25165. [#29337](https://github.com/ClickHouse/ClickHouse/pull/29337) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#29538](https://github.com/ClickHouse/ClickHouse/issues/29538): Fix possible `Block structure mismatch` for subqueries with pushed-down `HAVING` predicate. Fixes [#29010](https://github.com/ClickHouse/ClickHouse/issues/29010). [#29475](https://github.com/ClickHouse/ClickHouse/pull/29475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29589](https://github.com/ClickHouse/ClickHouse/issues/29589): In ODBC bridge add retries for error Invalid cursor state. It is a retriable error. Closes [#29473](https://github.com/ClickHouse/ClickHouse/issues/29473). [#29518](https://github.com/ClickHouse/ClickHouse/pull/29518) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29593](https://github.com/ClickHouse/ClickHouse/issues/29593): Fix bug in check `pathStartsWith` becuase there was bug with the usage of `std::mismatch`: ` The behavior is undefined if the second range is shorter than the first range.`. [#29531](https://github.com/ClickHouse/ClickHouse/pull/29531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29628](https://github.com/ClickHouse/ClickHouse/issues/29628): Fix rare segfault in `ALTER MODIFY` query when using incorrect table identifier in `DEFAULT` expression like `x.y.z...` Fixes [#29184](https://github.com/ClickHouse/ClickHouse/issues/29184). [#29573](https://github.com/ClickHouse/ClickHouse/pull/29573) ([alesapin](https://github.com/alesapin)). +* Backported in [#29752](https://github.com/ClickHouse/ClickHouse/issues/29752): Condition in filter predicate could be lost after push-down optimisation. [#29625](https://github.com/ClickHouse/ClickHouse/pull/29625) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29840](https://github.com/ClickHouse/ClickHouse/issues/29840): Fixed incorrect behaviour of setting `materialized_postgresql_tables_list` at server restart. Found in [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#29686](https://github.com/ClickHouse/ClickHouse/pull/29686) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29847](https://github.com/ClickHouse/ClickHouse/issues/29847): Fix concurrent access to `LowCardinality` during `GROUP BY` (leads to SIGSEGV). [#29782](https://github.com/ClickHouse/ClickHouse/pull/29782) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29911](https://github.com/ClickHouse/ClickHouse/issues/29911): Fix bad cast in `ATTACH TABLE ... FROM 'path'` query when non-string literal is used instead of path. It may lead to reading of uninitialized memory. [#29790](https://github.com/ClickHouse/ClickHouse/pull/29790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#29867](https://github.com/ClickHouse/ClickHouse/issues/29867): Avoid `Timeout exceeded: elapsed 18446744073.709553 seconds` error that might happen in extremely rare cases, presumably due to some bug in kernel. Fixes [#29154](https://github.com/ClickHouse/ClickHouse/issues/29154). [#29811](https://github.com/ClickHouse/ClickHouse/pull/29811) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#29877](https://github.com/ClickHouse/ClickHouse/issues/29877): Fix system tables recreation check (fails to detect changes in enum values). [#29857](https://github.com/ClickHouse/ClickHouse/pull/29857) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30210](https://github.com/ClickHouse/ClickHouse/issues/30210): Fix possible data-race between `FileChecker` and `StorageLog`/`StorageStripeLog`. [#29959](https://github.com/ClickHouse/ClickHouse/pull/29959) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30069](https://github.com/ClickHouse/ClickHouse/issues/30069): Fix crash of sample by `tuple()`, closes [#30004](https://github.com/ClickHouse/ClickHouse/issues/30004). [#30016](https://github.com/ClickHouse/ClickHouse/pull/30016) ([flynn](https://github.com/ucasfl)). +* Backported in [#30125](https://github.com/ClickHouse/ClickHouse/issues/30125): Dropped `Memory` database might reappear after server restart, it's fixed ([#29795](https://github.com/ClickHouse/ClickHouse/issues/29795)). Also added `force_remove_data_recursively_on_drop` setting as a workaround for `Directory not empty` error when dropping `Ordinary` database (because it's not possible to remove data leftovers manually in cloud environment). [#30054](https://github.com/ClickHouse/ClickHouse/pull/30054) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30263](https://github.com/ClickHouse/ClickHouse/issues/30263): FlatDictionary, HashedDictionary fix bytes_allocated calculation for nullable attributes. [#30238](https://github.com/ClickHouse/ClickHouse/pull/30238) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NO CL CATEGORY + +* Avoid deadlocks when reading and writting on JOIN Engine tables at the same time. [#30187](https://github.com/ClickHouse/ClickHouse/pull/30187) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/docs/changelogs/v21.9.1.8000-prestable.md b/docs/changelogs/v21.9.1.8000-prestable.md new file mode 100644 index 00000000000..cee357658d2 --- /dev/null +++ b/docs/changelogs/v21.9.1.8000-prestable.md @@ -0,0 +1,190 @@ +### ClickHouse release v21.9.1.8000-prestable FIXME as compared to v21.8.1.7409-prestable + +#### Backward Incompatible Change +* Fix the issue that in case of some sophisticated query with column aliases identical to the names of expressions, bad cast may happen. This fixes [#25447](https://github.com/ClickHouse/ClickHouse/issues/25447). This fixes [#26914](https://github.com/ClickHouse/ClickHouse/issues/26914). This fix may introduce backward incompatibility: if there are different expressions with identical names, exception will be thrown. It may break some rare cases when `enable_optimize_predicate_expression` is set. [#26639](https://github.com/ClickHouse/ClickHouse/pull/26639) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Under clickhouse-local, always treat local addresses with a port as remote. [#26736](https://github.com/ClickHouse/ClickHouse/pull/26736) ([Raúl Marín](https://github.com/Algunenano)). +* Do not allow to apply parametric aggregate function with `-Merge` combinator to aggregate function state if state was produced by aggregate function with different parameters. For example, state of `fooState(42)(x)` cannot be finalized with `fooMerge(s)` or `fooMerge(123)(s)`, parameters must be specified explicitly like `fooMerge(42)(s)` and must be equal. It does not affect some special aggregate functions like `quantile` and `sequence*` that use parameters for finalization only. [#26847](https://github.com/ClickHouse/ClickHouse/pull/26847) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not output trailing zeros in text representation of `Decimal` types. Example: `1.23` will be printed instead of `1.230000` for decimal with scale 6. This closes [#15794](https://github.com/ClickHouse/ClickHouse/issues/15794). It may introduce slight incompatibility if your applications somehow relied on the trailing zeros. Serialization in output formats can be controlled with the setting `output_format_decimal_trailing_zeros`. Implementation of `toString` and casting to String is changed unconditionally. [#27680](https://github.com/ClickHouse/ClickHouse/pull/27680) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implement window function `nth_value(expr, N)` that returns the value of the Nth row of the window frame. [#26334](https://github.com/ClickHouse/ClickHouse/pull/26334) ([Zuo, RuoYu](https://github.com/ryzuo)). +* - Add `REPLACE GRANT` feature. [#26384](https://github.com/ClickHouse/ClickHouse/pull/26384) ([Caspian](https://github.com/Cas-pian)). +* Functions that return (initial_)query_id of the current query. This closes [#23682](https://github.com/ClickHouse/ClickHouse/issues/23682). [#26410](https://github.com/ClickHouse/ClickHouse/pull/26410) ([Alexey Boykov](https://github.com/mathalex)). +* Introduce syntax for here documents. Example `SELECT $doc$VALUE$doc$`. [#26671](https://github.com/ClickHouse/ClickHouse/pull/26671) ([Maksim Kita](https://github.com/kitaisreal)). +* New functions `currentProfiles()`, `enabledProfiles()`, `defaultProfiles()`. [#26714](https://github.com/ClickHouse/ClickHouse/pull/26714) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add new functions `currentRoles()`, `enabledRoles()`, `defaultRoles()`. [#26780](https://github.com/ClickHouse/ClickHouse/pull/26780) ([Vitaly Baranov](https://github.com/vitlibar)). +* Supported cluster macros inside table functions 'cluster' and 'clusterAllReplicas'. [#26913](https://github.com/ClickHouse/ClickHouse/pull/26913) ([Vadim Volodin](https://github.com/PolyProgrammist)). +* Added support for custom query for MySQL, PostgreSQL, ClickHouse, JDBC, Cassandra dictionary source. Closes [#1270](https://github.com/ClickHouse/ClickHouse/issues/1270). [#26995](https://github.com/ClickHouse/ClickHouse/pull/26995) ([Maksim Kita](https://github.com/kitaisreal)). +* add column default_database to system.users. [#27054](https://github.com/ClickHouse/ClickHouse/pull/27054) ([kevin wan](https://github.com/MaxWk)). +* Added `bitmapSubsetOffsetLimit(bitmap, offset, cardinality_limit)` function. It creates a subset of bitmap limit the results to `cardinality_limit` with offset of `offset`. [#27234](https://github.com/ClickHouse/ClickHouse/pull/27234) ([DHBin](https://github.com/DHBin)). +* Add support for `bzip2` compression method for import/export. Closes [#22428](https://github.com/ClickHouse/ClickHouse/issues/22428). [#27377](https://github.com/ClickHouse/ClickHouse/pull/27377) ([Nikolay Degterinsky](https://github.com/evillique)). +* - Add replicated storage of user, roles, row policies, quotas and settings profiles through ZooKeeper (experimental). [#27426](https://github.com/ClickHouse/ClickHouse/pull/27426) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Add "tupleToNameValuePairs", a function that turns a named tuple into an array of pairs. [#27505](https://github.com/ClickHouse/ClickHouse/pull/27505) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). +* Enable using constants from with and select in aggregate function parameters. Close [#10945](https://github.com/ClickHouse/ClickHouse/issues/10945). [#27531](https://github.com/ClickHouse/ClickHouse/pull/27531) ([abel-cheng](https://github.com/abel-cheng)). +* Added ComplexKeyRangeHashed dictionary. Closes [#22029](https://github.com/ClickHouse/ClickHouse/issues/22029). [#27629](https://github.com/ClickHouse/ClickHouse/pull/27629) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* Compile aggregate functions `groupBitOr`, `groupBitAnd`, `groupBitXor`. [#26161](https://github.com/ClickHouse/ClickHouse/pull/26161) ([Maksim Kita](https://github.com/kitaisreal)). +* Compile columns with `Enum` types. [#26237](https://github.com/ClickHouse/ClickHouse/pull/26237) ([Maksim Kita](https://github.com/kitaisreal)). +* - Vectorize the SUM of Nullable integer types with native representation ([David Manzanares](https://github.com/davidmanzanares), [Raúl Marín](https://github.com/Algunenano)). [#26248](https://github.com/ClickHouse/ClickHouse/pull/26248) ([Raúl Marín](https://github.com/Algunenano)). +* Don't build sets for indices when analyzing a query. [#26365](https://github.com/ClickHouse/ClickHouse/pull/26365) ([Raúl Marín](https://github.com/Algunenano)). +* Improve latency of short queries, that require reading from tables with large number of columns. [#26371](https://github.com/ClickHouse/ClickHouse/pull/26371) ([Anton Popov](https://github.com/CurtizJ)). +* Share file descriptors in concurrent reads of the same files. There is no noticeable performance difference on Linux. But the number of opened files will be significantly (10..100 times) lower on typical servers and it makes operations easier. See [#26214](https://github.com/ClickHouse/ClickHouse/issues/26214). [#26768](https://github.com/ClickHouse/ClickHouse/pull/26768) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Specialize date time related comparison to achieve better performance. This fixes [#27083](https://github.com/ClickHouse/ClickHouse/issues/27083) . [#27122](https://github.com/ClickHouse/ClickHouse/pull/27122) ([Amos Bird](https://github.com/amosbird)). +* Improve the performance of fast queries when `max_execution_time=0` by reducing the number of `clock_gettime` system calls. [#27325](https://github.com/ClickHouse/ClickHouse/pull/27325) ([filimonov](https://github.com/filimonov)). +* Less number of `clock_gettime` syscalls that may lead to performance improvement for some types of fast queries. [#27492](https://github.com/ClickHouse/ClickHouse/pull/27492) ([filimonov](https://github.com/filimonov)). + +#### Improvement +* Add error id (like `BAD_ARGUMENTS`) to exception messages. This closes [#25862](https://github.com/ClickHouse/ClickHouse/issues/25862). [#26172](https://github.com/ClickHouse/ClickHouse/pull/26172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove GLOBAL keyword for IN when scalar function is passed. In previous versions, if user specified `GLOBAL IN f(x)` exception was thrown. [#26217](https://github.com/ClickHouse/ClickHouse/pull/26217) ([Amos Bird](https://github.com/amosbird)). +* Apply aggressive IN index analysis for projections so that better projection candidate can be selected. [#26218](https://github.com/ClickHouse/ClickHouse/pull/26218) ([Amos Bird](https://github.com/amosbird)). +* convert timestamp and timestamptz data types to DateTime64 in postgres engine. [#26234](https://github.com/ClickHouse/ClickHouse/pull/26234) ([jasine](https://github.com/jasine)). +* Check for non-deterministic functions in keys, including constant expressions like `now()`, `today()`. This closes [#25875](https://github.com/ClickHouse/ClickHouse/issues/25875). This closes [#11333](https://github.com/ClickHouse/ClickHouse/issues/11333). [#26235](https://github.com/ClickHouse/ClickHouse/pull/26235) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't throw exception when querying `system.detached_parts` table if there is custom disk configuration and `detached` directory does not exist on some disks. This closes [#26078](https://github.com/ClickHouse/ClickHouse/issues/26078). [#26236](https://github.com/ClickHouse/ClickHouse/pull/26236) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add information about column sizes in `system.columns` table for `Log` and `TinyLog` tables. This closes [#9001](https://github.com/ClickHouse/ClickHouse/issues/9001). [#26241](https://github.com/ClickHouse/ClickHouse/pull/26241) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added `output_format_avro_string_column_pattern` setting to put specified String columns to Avro as string instead of default bytes. Implements [#22414](https://github.com/ClickHouse/ClickHouse/issues/22414). [#26245](https://github.com/ClickHouse/ClickHouse/pull/26245) ([Ilya Golshtein](https://github.com/ilejn)). +* - Add `system.warnings` table to collect warnings about server configuration. [#26246](https://github.com/ClickHouse/ClickHouse/pull/26246) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Check hash function at table creation, not at sampling. Add settings in MergeTreeSettings, if someone create a table with incorrect sampling column but sampling never be used, disable this settings for starting the server without exception. [#26256](https://github.com/ClickHouse/ClickHouse/pull/26256) ([zhaoyu](https://github.com/zxc111)). +* Make `toTimeZone` monotonicity when timeZone is a constant value to support partition puring when use sql like:. [#26261](https://github.com/ClickHouse/ClickHouse/pull/26261) ([huangzhaowei](https://github.com/SaintBacchus)). +* - When client connect to server, he receives information about all warnings that are already were collected by server. (It can be disabled by using option `--no-warnings`). [#26282](https://github.com/ClickHouse/ClickHouse/pull/26282) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add a setting `function_range_max_elements_in_block` to tune the safety threshold for data volume generated by function `range`. This closes [#26303](https://github.com/ClickHouse/ClickHouse/issues/26303). [#26305](https://github.com/ClickHouse/ClickHouse/pull/26305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Control the execution period of clear old temporary directories by parameter with default value. [#26212](https://github.com/ClickHouse/ClickHouse/issues/26212). [#26313](https://github.com/ClickHouse/ClickHouse/pull/26313) ([fastio](https://github.com/fastio)). +* Allow to reuse connections of shards among different clusters. It also avoids creating new connections when using `cluster` table function. [#26318](https://github.com/ClickHouse/ClickHouse/pull/26318) ([Amos Bird](https://github.com/amosbird)). +* Add events to profile calls to sleep / sleepEachRow. [#26320](https://github.com/ClickHouse/ClickHouse/pull/26320) ([Raúl Marín](https://github.com/Algunenano)). +* Save server address in history URLs in web UI if it differs from the origin of web UI. This closes [#26044](https://github.com/ClickHouse/ClickHouse/issues/26044). [#26322](https://github.com/ClickHouse/ClickHouse/pull/26322) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to set Distributed directory monitor settings via CREATE TABLE (i.e. `CREATE TABLE dist (key Int) Engine=Distributed(cluster, db, table) SETTINGS monitor_batch_inserts=1` and similar). [#26336](https://github.com/ClickHouse/ClickHouse/pull/26336) ([Azat Khuzhin](https://github.com/azat)). +* Fix behaviour with non-existing host in user allowed host list. [#26368](https://github.com/ClickHouse/ClickHouse/pull/26368) ([ianton-ru](https://github.com/ianton-ru)). +* Added comments for the code written in https://github.com/ClickHouse/ClickHouse/pull/24206; the code has been improved in several places. [#26377](https://github.com/ClickHouse/ClickHouse/pull/26377) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable `use_hedged_requests` setting that allows to mitigate tail latencies on large clusters. [#26380](https://github.com/ClickHouse/ClickHouse/pull/26380) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Updated protobuf to 3.17.3. Changelogs are available on https://github.com/protocolbuffers/protobuf/releases. [#26424](https://github.com/ClickHouse/ClickHouse/pull/26424) ([Ilya Yatsishin](https://github.com/qoega)). +* After https://github.com/ClickHouse/ClickHouse/pull/26377. Encryption algorithm now should be specified explicitly if it's not default (`aes_128_ctr`):. [#26465](https://github.com/ClickHouse/ClickHouse/pull/26465) ([Vitaly Baranov](https://github.com/vitlibar)). +* Apply `LIMIT` on the shards for queries like `SELECT * FROM dist ORDER BY key LIMIT 10` w/ `distributed_push_down_limit=1`. Avoid running `Distinct`/`LIMIT BY` steps for queries like `SELECT DISTINCT shading_key FROM dist ORDER BY key`. Now `distributed_push_down_limit` is respected by `optimize_distributed_group_by_sharding_key` optimization. [#26466](https://github.com/ClickHouse/ClickHouse/pull/26466) ([Azat Khuzhin](https://github.com/azat)). +* - Set client query kind for mysql and postgresql handler. [#26498](https://github.com/ClickHouse/ClickHouse/pull/26498) ([anneji-dev](https://github.com/anneji-dev)). +* Executable dictionaries (ExecutableDictionarySource, ExecutablePoolDictionarySource) enable creation with DDL query using clickhouse-local. Closes [#22355](https://github.com/ClickHouse/ClickHouse/issues/22355). [#26510](https://github.com/ClickHouse/ClickHouse/pull/26510) ([Maksim Kita](https://github.com/kitaisreal)). +* Add round-robin support for clickhouse-benchmark (it does not differ from the regular multi host/port run except for statistics report). [#26607](https://github.com/ClickHouse/ClickHouse/pull/26607) ([Azat Khuzhin](https://github.com/azat)). +* Improve the high performance machine to use the kafka engine. and it can recuce the query node work load. [#26642](https://github.com/ClickHouse/ClickHouse/pull/26642) ([feihengye](https://github.com/feihengye)). +* Avoid hanging clickhouse-benchmark if connection fails (i.e. on EMFILE). [#26656](https://github.com/ClickHouse/ClickHouse/pull/26656) ([Azat Khuzhin](https://github.com/azat)). +* Fix excessive (x2) connect attempts with skip_unavailable_shards. [#26658](https://github.com/ClickHouse/ClickHouse/pull/26658) ([Azat Khuzhin](https://github.com/azat)). +* - `mapPopulatesSeries` function supports `Map` type. [#26663](https://github.com/ClickHouse/ClickHouse/pull/26663) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Improve handling of KILL QUERY requests. [#26675](https://github.com/ClickHouse/ClickHouse/pull/26675) ([Raúl Marín](https://github.com/Algunenano)). +* SET PROFILE now applies constraints too if they're set for a passed profile. [#26730](https://github.com/ClickHouse/ClickHouse/pull/26730) ([Vitaly Baranov](https://github.com/vitlibar)). +* Support multiple keys for encrypted disk. Display error message if the key is probably wrong. (see https://github.com/ClickHouse/ClickHouse/pull/26465#issuecomment-882015970). [#26733](https://github.com/ClickHouse/ClickHouse/pull/26733) ([Vitaly Baranov](https://github.com/vitlibar)). +* remove uncessary exception thrown. [#26740](https://github.com/ClickHouse/ClickHouse/pull/26740) ([Caspian](https://github.com/Cas-pian)). +* Watchdog is disabled in docker by default. Fix for not handling ctrl+c. [#26757](https://github.com/ClickHouse/ClickHouse/pull/26757) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Changing default roles affects new sessions only. [#26759](https://github.com/ClickHouse/ClickHouse/pull/26759) ([Vitaly Baranov](https://github.com/vitlibar)). +* Less verbose internal RocksDB logs. This closes [#26252](https://github.com/ClickHouse/ClickHouse/issues/26252). [#26789](https://github.com/ClickHouse/ClickHouse/pull/26789) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Expose rocksdb statistics via system.rocksdb table. Read rocksdb options from ClickHouse config (`rocksdb`/`rocksdb_TABLE` keys). [#26821](https://github.com/ClickHouse/ClickHouse/pull/26821) ([Azat Khuzhin](https://github.com/azat)). +* Updated extractAllGroupsHorizontal - upper limit on the number of matches per row can be set via optional third argument. ... [#26961](https://github.com/ClickHouse/ClickHouse/pull/26961) ([Vasily Nemkov](https://github.com/Enmk)). +* Now functions can be shard-level constants, which means if it's executed in the context of some distributed table, it generates a normal column, otherwise it produces a constant value. Notable functions are: `hostName()`, `tcpPort()`, `version()`, `buildId()`, `uptime()`, etc. [#27020](https://github.com/ClickHouse/ClickHouse/pull/27020) ([Amos Bird](https://github.com/amosbird)). +* * Merge join correctly handles empty set in the right. [#27078](https://github.com/ClickHouse/ClickHouse/pull/27078) ([Vladimir C](https://github.com/vdimir)). +* Improve compatibility with non-whole-minute timezone offsets. [#27080](https://github.com/ClickHouse/ClickHouse/pull/27080) ([Raúl Marín](https://github.com/Algunenano)). +* Enable distributed_push_down_limit by default. [#27104](https://github.com/ClickHouse/ClickHouse/pull/27104) ([Azat Khuzhin](https://github.com/azat)). +* Improved the existence condition judgment and empty string node judgment when clickhouse-keeper creates znode. [#27125](https://github.com/ClickHouse/ClickHouse/pull/27125) ([小路](https://github.com/nicelulu)). +* Add compression for `INTO OUTFILE` that automatically choose compression algorithm. Closes [#3473](https://github.com/ClickHouse/ClickHouse/issues/3473). [#27134](https://github.com/ClickHouse/ClickHouse/pull/27134) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* add a new metric called MaxPushedDDLEntryID which is the maximum ddl entry id that current node push to zookeeper. [#27174](https://github.com/ClickHouse/ClickHouse/pull/27174) ([Fuwang Hu](https://github.com/fuwhu)). +* Allow to pass query settings via server URI in Web UI. [#27177](https://github.com/ClickHouse/ClickHouse/pull/27177) ([kolsys](https://github.com/kolsys)). +* Added columns `replica_is_active` that maps replica name to is replica active status to table `system.replicas`. Closes [#27138](https://github.com/ClickHouse/ClickHouse/issues/27138). [#27180](https://github.com/ClickHouse/ClickHouse/pull/27180) ([Maksim Kita](https://github.com/kitaisreal)). +* Try recording `query_kind` even when query fails to start. [#27182](https://github.com/ClickHouse/ClickHouse/pull/27182) ([Amos Bird](https://github.com/amosbird)). +* Mark window functions as ready for general use. Remove the `allow_experimental_window_functions` setting. [#27184](https://github.com/ClickHouse/ClickHouse/pull/27184) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Memory client in client. [#27191](https://github.com/ClickHouse/ClickHouse/pull/27191) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Support schema for postgres database engine. Closes [#27166](https://github.com/ClickHouse/ClickHouse/issues/27166). [#27198](https://github.com/ClickHouse/ClickHouse/pull/27198) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Split global mutex into individual regexp construction. This helps avoid huge regexp construction blocking other related threads. Not sure how to proper test the improvement. [#27211](https://github.com/ClickHouse/ClickHouse/pull/27211) ([Amos Bird](https://github.com/amosbird)). +* Add 10 seconds cache for S3 proxy resolver. [#27216](https://github.com/ClickHouse/ClickHouse/pull/27216) ([ianton-ru](https://github.com/ianton-ru)). +* Add new index data skipping minmax index format for proper Nullable support. [#27250](https://github.com/ClickHouse/ClickHouse/pull/27250) ([Azat Khuzhin](https://github.com/azat)). +* Memory consumed by bitmap aggregate functions now is taken into account for memory limits. This closes [#26555](https://github.com/ClickHouse/ClickHouse/issues/26555). [#27252](https://github.com/ClickHouse/ClickHouse/pull/27252) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add two settings `max_hyperscan_regexp_length` and `max_hyperscan_regexp_total_length` to prevent huge regexp being used in hyperscan related functions, such as `multiMatchAny`. [#27378](https://github.com/ClickHouse/ClickHouse/pull/27378) ([Amos Bird](https://github.com/amosbird)). +* Add setting `log_formatted_queries` to log additional formatted query into `system.query_log`. It's useful for normalized query analysis because functions like `normalizeQuery` and `normalizeQueryKeepNames` don't parse/format queries in order to achieve better performance. [#27380](https://github.com/ClickHouse/ClickHouse/pull/27380) ([Amos Bird](https://github.com/amosbird)). +* Add Cast function for internal usage, which will not preserve type nullability, but non-internal cast will preserve according to setting cast_keep_nullable. Closes [#12636](https://github.com/ClickHouse/ClickHouse/issues/12636). [#27382](https://github.com/ClickHouse/ClickHouse/pull/27382) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Send response with error message if HTTP port is not set and user tries to send HTTP request to TCP port. [#27385](https://github.com/ClickHouse/ClickHouse/pull/27385) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). +* Use bytes instead of strings for binary data in the GRPC protocol. [#27431](https://github.com/ClickHouse/ClickHouse/pull/27431) ([Vitaly Baranov](https://github.com/vitlibar)). +* Log client IP address if authentication fails. [#27514](https://github.com/ClickHouse/ClickHouse/pull/27514) ([Misko Lee](https://github.com/imiskolee)). +* Disable arrayJoin on partition expressions. [#27648](https://github.com/ClickHouse/ClickHouse/pull/27648) ([Raúl Marín](https://github.com/Algunenano)). +* - Add `FROM INFILE` command. [#27655](https://github.com/ClickHouse/ClickHouse/pull/27655) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Enables query parameters to be passed in the body of http requests. [#27706](https://github.com/ClickHouse/ClickHouse/pull/27706) ([Hermano Lustosa](https://github.com/hllustosa)). +* Remove duplicate index analysis and avoid possible invalid limit checks during projection analysis. [#27742](https://github.com/ClickHouse/ClickHouse/pull/27742) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix +* Fix potential crash if more than one `untuple` expression is used. [#26179](https://github.com/ClickHouse/ClickHouse/pull/26179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove excessive newline in `thread_name` column in `system.stack_trace` table. This fixes [#24124](https://github.com/ClickHouse/ClickHouse/issues/24124). [#26210](https://github.com/ClickHouse/ClickHouse/pull/26210) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix logical error on join with totals, close [#26017](https://github.com/ClickHouse/ClickHouse/issues/26017). [#26250](https://github.com/ClickHouse/ClickHouse/pull/26250) ([Vladimir C](https://github.com/vdimir)). +* Fix zstd decompression in case there are escape sequences at the end of internal buffer. Closes [#26013](https://github.com/ClickHouse/ClickHouse/issues/26013). [#26314](https://github.com/ClickHouse/ClickHouse/pull/26314) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed rare bug in lost replica recovery that may cause replicas to diverge. [#26321](https://github.com/ClickHouse/ClickHouse/pull/26321) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `optimize_distributed_group_by_sharding_key` for multiple columns (leads to incorrect result w/ `optimize_skip_unused_shards=1`/`allow_nondeterministic_optimize_skip_unused_shards=1` and multiple columns in sharding key expression). [#26353](https://github.com/ClickHouse/ClickHouse/pull/26353) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible crash when login as dropped user. This PR fixes [#26073](https://github.com/ClickHouse/ClickHouse/issues/26073). [#26363](https://github.com/ClickHouse/ClickHouse/pull/26363) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix infinite non joined block stream in `partial_merge_join` close [#26325](https://github.com/ClickHouse/ClickHouse/issues/26325). [#26374](https://github.com/ClickHouse/ClickHouse/pull/26374) ([Vladimir C](https://github.com/vdimir)). +* Now, scalar subquery always returns `Nullable` result if it's type can be `Nullable`. It is needed because in case of empty subquery it's result should be `Null`. Previously, it was possible to get error about incompatible types (type deduction does not execute scalar subquery, and it could use not-nullable type). Scalar subquery with empty result which can't be converted to `Nullable` (like `Array` or `Tuple`) now throws error. Fixes [#25411](https://github.com/ClickHouse/ClickHouse/issues/25411). [#26423](https://github.com/ClickHouse/ClickHouse/pull/26423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix some fuzzed msan crash. Fixes [#22517](https://github.com/ClickHouse/ClickHouse/issues/22517). [#26428](https://github.com/ClickHouse/ClickHouse/pull/26428) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix broken name resolution after rewriting column aliases. This fixes [#26432](https://github.com/ClickHouse/ClickHouse/issues/26432). [#26475](https://github.com/ClickHouse/ClickHouse/pull/26475) ([Amos Bird](https://github.com/amosbird)). +* Fix issues with `CREATE DICTIONARY` query if dictionary name or database name was quoted. Closes [#26491](https://github.com/ClickHouse/ClickHouse/issues/26491). [#26508](https://github.com/ClickHouse/ClickHouse/pull/26508) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash in rabbitmq shutdown in case rabbitmq setup was not started. Closes [#26504](https://github.com/ClickHouse/ClickHouse/issues/26504). [#26529](https://github.com/ClickHouse/ClickHouse/pull/26529) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update `chown` cmd check in clickhouse-server docker entrypoint. It fixes the bug that cluster pod restart failed (or timeout) on kubernetes. [#26545](https://github.com/ClickHouse/ClickHouse/pull/26545) ([Ky Li](https://github.com/Kylinrix)). +* Fix incorrect function names of groupBitmapAnd/Or/Xor. This fixes. [#26557](https://github.com/ClickHouse/ClickHouse/pull/26557) ([Amos Bird](https://github.com/amosbird)). +* Fix history file conversion if file is empty. [#26589](https://github.com/ClickHouse/ClickHouse/pull/26589) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential nullptr dereference in window functions. This fixes [#25276](https://github.com/ClickHouse/ClickHouse/issues/25276). [#26668](https://github.com/ClickHouse/ClickHouse/pull/26668) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* ParallelFormattingOutputFormat: Use mutex to handle the join to the collector_thread (https://github.com/ClickHouse/ClickHouse/issues/26694). [#26703](https://github.com/ClickHouse/ClickHouse/pull/26703) ([Raúl Marín](https://github.com/Algunenano)). +* Sometimes SET ROLE could work incorrectly, this PR fixes that. [#26707](https://github.com/ClickHouse/ClickHouse/pull/26707) ([Vitaly Baranov](https://github.com/vitlibar)). +* Do not remove data on ReplicatedMergeTree table shutdown to avoid creating data to metadata inconsistency. [#26716](https://github.com/ClickHouse/ClickHouse/pull/26716) ([nvartolomei](https://github.com/nvartolomei)). +* Add `event_time_microseconds` value for `REMOVE_PART` in `system.part_log`. In previous versions is was not set. [#26720](https://github.com/ClickHouse/ClickHouse/pull/26720) ([Azat Khuzhin](https://github.com/azat)). +* Aggregate function parameters might be lost when applying some combinators causing exceptions like `Conversion from AggregateFunction(topKArray, Array(String)) to AggregateFunction(topKArray(10), Array(String)) is not supported`. It's fixed. Fixes [#26196](https://github.com/ClickHouse/ClickHouse/issues/26196) and [#26433](https://github.com/ClickHouse/ClickHouse/issues/26433). [#26814](https://github.com/ClickHouse/ClickHouse/pull/26814) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix library-bridge ids load. [#26834](https://github.com/ClickHouse/ClickHouse/pull/26834) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix error `Missing columns: 'xxx'` when `DEFAULT` column references other non materialized column without `DEFAULT` expression. Fixes [#26591](https://github.com/ClickHouse/ClickHouse/issues/26591). [#26900](https://github.com/ClickHouse/ClickHouse/pull/26900) ([alesapin](https://github.com/alesapin)). +* Fix reading of custom TLDs (stops processing with lower buffer or bigger file). [#26948](https://github.com/ClickHouse/ClickHouse/pull/26948) ([Azat Khuzhin](https://github.com/azat)). +* Fix "Unknown column name" error with multiple JOINs in some cases, close [#26899](https://github.com/ClickHouse/ClickHouse/issues/26899). [#26957](https://github.com/ClickHouse/ClickHouse/pull/26957) ([Vladimir C](https://github.com/vdimir)). +* Now partition ID in queries like `ALTER TABLE ... PARTITION ID xxx` validates for correctness. Fixes [#25718](https://github.com/ClickHouse/ClickHouse/issues/25718). [#26963](https://github.com/ClickHouse/ClickHouse/pull/26963) ([alesapin](https://github.com/alesapin)). +* [RFC] Fix possible mutation stack due to race with DROP_RANGE. [#27002](https://github.com/ClickHouse/ClickHouse/pull/27002) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache` configuration parsing. Options `allow_read_expired_keys`, `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds` were not parsed for dictionaries with non `cache` type. [#27032](https://github.com/ClickHouse/ClickHouse/pull/27032) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix synchronization in GRPCServer This PR fixes [#27024](https://github.com/ClickHouse/ClickHouse/issues/27024). [#27064](https://github.com/ClickHouse/ClickHouse/pull/27064) ([Vitaly Baranov](https://github.com/vitlibar)). +* - Fix uninitialized memory in functions `multiSearch*` with empty array, close [#27169](https://github.com/ClickHouse/ClickHouse/issues/27169). [#27181](https://github.com/ClickHouse/ClickHouse/pull/27181) ([Vladimir C](https://github.com/vdimir)). +* In rare cases `system.detached_parts` table might contain incorrect information for some parts, it's fixed. Fixes [#27114](https://github.com/ClickHouse/ClickHouse/issues/27114). [#27183](https://github.com/ClickHouse/ClickHouse/pull/27183) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix on-disk format breakage for secondary indices over Nullable column (no stable release had been affected). [#27197](https://github.com/ClickHouse/ClickHouse/pull/27197) ([Azat Khuzhin](https://github.com/azat)). +* Fix column structure in merge join, close [#27091](https://github.com/ClickHouse/ClickHouse/issues/27091). [#27217](https://github.com/ClickHouse/ClickHouse/pull/27217) ([Vladimir C](https://github.com/vdimir)). +* In case of ambiguity, lambda functions prefer its arguments to other aliases or identifiers. [#27235](https://github.com/ClickHouse/ClickHouse/pull/27235) ([Raúl Marín](https://github.com/Algunenano)). +* Fix mutation stuck on invalid partitions in non-replicated MergeTree. [#27248](https://github.com/ClickHouse/ClickHouse/pull/27248) ([Azat Khuzhin](https://github.com/azat)). +* Fix `distributed_group_by_no_merge=2`+`distributed_push_down_limit=1` or `optimize_distributed_group_by_sharding_key=1` with `LIMIT BY` and `LIMIT OFFSET`. [#27249](https://github.com/ClickHouse/ClickHouse/pull/27249) ([Azat Khuzhin](https://github.com/azat)). +* Fix errors like `Expected ColumnLowCardinality, gotUInt8` or `Bad cast from type DB::ColumnVector to DB::ColumnLowCardinality` for some queries with `LowCardinality` in `PREWHERE`. Fixes [#23515](https://github.com/ClickHouse/ClickHouse/issues/23515). [#27298](https://github.com/ClickHouse/ClickHouse/pull/27298) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Cannot find column` error for queries with sampling. Was introduced in [#24574](https://github.com/ClickHouse/ClickHouse/issues/24574). Fixes [#26522](https://github.com/ClickHouse/ClickHouse/issues/26522). [#27301](https://github.com/ClickHouse/ClickHouse/pull/27301) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Mysql protocol when using parallel formats (CSV / TSV). [#27326](https://github.com/ClickHouse/ClickHouse/pull/27326) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed incorrect validation of partition id for MergeTree tables that created with old syntax. [#27328](https://github.com/ClickHouse/ClickHouse/pull/27328) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect result for query with row-level security, prewhere and LowCardinality filter. Fixes [#27179](https://github.com/ClickHouse/ClickHouse/issues/27179). [#27329](https://github.com/ClickHouse/ClickHouse/pull/27329) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* /proc/info contains metrics like. [#27361](https://github.com/ClickHouse/ClickHouse/pull/27361) ([Mike Kot](https://github.com/myrrc)). +* Fix distributed queries with zero shards and aggregation. [#27427](https://github.com/ClickHouse/ClickHouse/pull/27427) ([Azat Khuzhin](https://github.com/azat)). +* fix metric BackgroundMessageBrokerSchedulePoolTask, maybe mistyped。. [#27452](https://github.com/ClickHouse/ClickHouse/pull/27452) ([Ben](https://github.com/benbiti)). +* Fix crash during projection materialization when some parts contain missing columns. This fixes [#27512](https://github.com/ClickHouse/ClickHouse/issues/27512). [#27528](https://github.com/ClickHouse/ClickHouse/pull/27528) ([Amos Bird](https://github.com/amosbird)). +* Fixed underflow of the time value when constructing it from components. Closes [#27193](https://github.com/ClickHouse/ClickHouse/issues/27193). [#27605](https://github.com/ClickHouse/ClickHouse/pull/27605) ([Vasily Nemkov](https://github.com/Enmk)). +* After setting `max_memory_usage*` to non-zero value it was not possible to reset it back to 0 (unlimited). It's fixed. [#27638](https://github.com/ClickHouse/ClickHouse/pull/27638) ([Alexander Tokmakov](https://github.com/tavplubix)). +* - Fix bug with aliased column in `Distributed` table. [#27652](https://github.com/ClickHouse/ClickHouse/pull/27652) ([Vladimir C](https://github.com/vdimir)). +* Fixed another case of `Unexpected merged part ... intersecting drop range ...` error. [#27656](https://github.com/ClickHouse/ClickHouse/pull/27656) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix postgresql table function resulting in non-closing connections. Closes [#26088](https://github.com/ClickHouse/ClickHouse/issues/26088). [#27662](https://github.com/ClickHouse/ClickHouse/pull/27662) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bad type cast when functions like `arrayHas` are applied to arrays of LowCardinality of Nullable of different non-numeric types like `DateTime` and `DateTime64`. In previous versions bad cast occurs. In new version it will lead to exception. This closes [#26330](https://github.com/ClickHouse/ClickHouse/issues/26330). [#27682](https://github.com/ClickHouse/ClickHouse/pull/27682) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix column filtering with union distinct in subquery. Closes [#27578](https://github.com/ClickHouse/ClickHouse/issues/27578). [#27689](https://github.com/ClickHouse/ClickHouse/pull/27689) ([Kseniia Sumarokova](https://github.com/kssenii)). +* After https://github.com/ClickHouse/ClickHouse/pull/26384. To execute `GRANT WITH REPLACE OPTION` now the current user should have `GRANT OPTION` for access rights it's going to grant AND for access rights it's going to revoke. [#27701](https://github.com/ClickHouse/ClickHouse/pull/27701) ([Vitaly Baranov](https://github.com/vitlibar)). +* After https://github.com/ClickHouse/ClickHouse/pull/25687. Add backquotes for the default database shown in CREATE USER. [#27702](https://github.com/ClickHouse/ClickHouse/pull/27702) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove duplicated source files in CMakeLists.txt in arrow-cmake. [#27736](https://github.com/ClickHouse/ClickHouse/pull/27736) ([李扬](https://github.com/taiyang-li)). +* Fix possible crash when asynchronous connection draining is enabled and hedged connection is disabled. [#27774](https://github.com/ClickHouse/ClickHouse/pull/27774) ([Amos Bird](https://github.com/amosbird)). +* Prevent crashes for some formats when NULL (tombstone) message was coming from Kafka. Closes [#19255](https://github.com/ClickHouse/ClickHouse/issues/19255). [#27794](https://github.com/ClickHouse/ClickHouse/pull/27794) ([filimonov](https://github.com/filimonov)). +* Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). +* Fix a couple of bugs that may cause replicas to diverge. [#27808](https://github.com/ClickHouse/ClickHouse/pull/27808) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### Build/Testing/Packaging Improvement +* Update RocksDB to 2021-07-16 master. [#26411](https://github.com/ClickHouse/ClickHouse/pull/26411) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `clickhouse-test` supports SQL tests with [Jinja2](https://jinja.palletsprojects.com/en/3.0.x/templates/#synopsis) templates. [#26579](https://github.com/ClickHouse/ClickHouse/pull/26579) ([Vladimir C](https://github.com/vdimir)). +* Fix /clickhouse/window functions/tests/non distributed/errors/error window function in join. [#26744](https://github.com/ClickHouse/ClickHouse/pull/26744) ([vzakaznikov](https://github.com/vzakaznikov)). +* Enabling RBAC TestFlows tests and crossing out new fails. [#26747](https://github.com/ClickHouse/ClickHouse/pull/26747) ([vzakaznikov](https://github.com/vzakaznikov)). +* Tests: Fix CLICKHOUSE_CLIENT_SECURE with the default config. [#26901](https://github.com/ClickHouse/ClickHouse/pull/26901) ([Raúl Marín](https://github.com/Algunenano)). +* Fix linking of auxiliar programs when using dynamic libraries. [#26958](https://github.com/ClickHouse/ClickHouse/pull/26958) ([Raúl Marín](https://github.com/Algunenano)). +* Add CMake options to build with or without specific CPU instruction set. This is for [#17469](https://github.com/ClickHouse/ClickHouse/issues/17469) and [#27509](https://github.com/ClickHouse/ClickHouse/issues/27509). [#27508](https://github.com/ClickHouse/ClickHouse/pull/27508) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for build with `clang-13`. This closes [#27705](https://github.com/ClickHouse/ClickHouse/issues/27705). [#27714](https://github.com/ClickHouse/ClickHouse/pull/27714) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve support for build with `clang-13`. [#27777](https://github.com/ClickHouse/ClickHouse/pull/27777) ([Sergei Semin](https://github.com/syominsergey)). + +#### Other +* Rename `MaterializeMySQL` to `MaterializedMySQL`. [#26822](https://github.com/ClickHouse/ClickHouse/pull/26822) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Modify code comments'. [#26265](https://github.com/ClickHouse/ClickHouse/pull/26265) ([xiedeyantu](https://github.com/xiedeyantu)). +* NO CL ENTRY: 'Revert "Datatype Date32, support range 1925 to 2283"'. [#26352](https://github.com/ClickHouse/ClickHouse/pull/26352) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Fix CURR_DATABASE empty for 01034_move_partition_from_table_zookeeper.sh'. [#27164](https://github.com/ClickHouse/ClickHouse/pull/27164) ([小路](https://github.com/nicelulu)). +* NO CL ENTRY: 'DOCSUP-12413: macros support in functions cluster and clusterAllReplicas'. [#27759](https://github.com/ClickHouse/ClickHouse/pull/27759) ([olgarev](https://github.com/olgarev)). +* NO CL ENTRY: 'Revert "less sys calls #2: make vdso work again"'. [#27829](https://github.com/ClickHouse/ClickHouse/pull/27829) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Do not miss exceptions from the ThreadPool"'. [#27844](https://github.com/ClickHouse/ClickHouse/pull/27844) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v21.9.2.17-stable.md b/docs/changelogs/v21.9.2.17-stable.md new file mode 100644 index 00000000000..3f132b983c0 --- /dev/null +++ b/docs/changelogs/v21.9.2.17-stable.md @@ -0,0 +1,45 @@ +### ClickHouse release v21.9.2.17-stable FIXME as compared to v21.9.1.8000-prestable + +#### Improvement +* Backported in [#27894](https://github.com/ClickHouse/ClickHouse/issues/27894): Allow symlinks for library dictionaty path. [#27815](https://github.com/ClickHouse/ClickHouse/pull/27815) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#28153](https://github.com/ClickHouse/ClickHouse/issues/28153): Use Multipart copy upload for large S3 objects. [#27858](https://github.com/ClickHouse/ClickHouse/pull/27858) ([ianton-ru](https://github.com/ianton-ru)). +* Backported in [#28643](https://github.com/ClickHouse/ClickHouse/issues/28643): Fix strange sessions expiration logic in Keeper. Probably it should help in CI: https://clickhouse-test-reports.s3.yandex.net/0/6bd9b82141c98dcd7796fd9d08326831095ba519/stress_test_(debug).html#fail1. [#28519](https://github.com/ClickHouse/ClickHouse/pull/28519) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix +* Backported in [#27981](https://github.com/ClickHouse/ClickHouse/issues/27981): Bugfix for windowFunnel's "strict" mode. This fixes [#27469](https://github.com/ClickHouse/ClickHouse/issues/27469). [#27563](https://github.com/ClickHouse/ClickHouse/pull/27563) ([achimbab](https://github.com/achimbab)). +* Backported in [#27922](https://github.com/ClickHouse/ClickHouse/issues/27922): After https://github.com/ClickHouse/ClickHouse/pull/26864. Fix shutdown of `NamedSessionStorage`: session contexts stored in `NamedSessionStorage` are now destroyed before destroying the global context. [#27875](https://github.com/ClickHouse/ClickHouse/pull/27875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27926](https://github.com/ClickHouse/ClickHouse/issues/27926): Fix PostgreSQL-style cast (`::` operator) with negative numbers. [#27876](https://github.com/ClickHouse/ClickHouse/pull/27876) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#27959](https://github.com/ClickHouse/ClickHouse/issues/27959): Fix selecting with extremes from a column of the type `LowCardinality(UUID)`. [#27918](https://github.com/ClickHouse/ClickHouse/pull/27918) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#27954](https://github.com/ClickHouse/ClickHouse/issues/27954): Check cluster name before creating Distributed table, do not allow to create a table with incorrect cluster name. Fixes [#27832](https://github.com/ClickHouse/ClickHouse/issues/27832). [#27927](https://github.com/ClickHouse/ClickHouse/pull/27927) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28000](https://github.com/ClickHouse/ClickHouse/issues/28000): Fix checking access grants when executing GRANT WITH REPLACE statement with ON CLUSTER clause. This PR improves fix https://github.com/ClickHouse/ClickHouse/pull/27701. [#27983](https://github.com/ClickHouse/ClickHouse/pull/27983) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#28210](https://github.com/ClickHouse/ClickHouse/issues/28210): Fix cases, when read buffer fails with 'attempt to read after end of file'. Closes [#26149](https://github.com/ClickHouse/ClickHouse/issues/26149). [#28150](https://github.com/ClickHouse/ClickHouse/pull/28150) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Backported in [#28750](https://github.com/ClickHouse/ClickHouse/issues/28750): Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement +* Backported in [#28029](https://github.com/ClickHouse/ClickHouse/issues/28029): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#27973](https://github.com/ClickHouse/ClickHouse/issues/27973): Fix handling null value with type of Nullable(String) in function JSONExtract. This fixes [#27929](https://github.com/ClickHouse/ClickHouse/issues/27929) and [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930) . This was introduced in https://github.com/ClickHouse/ClickHouse/pull/25452 . [#27939](https://github.com/ClickHouse/ClickHouse/pull/27939) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#28118](https://github.com/ClickHouse/ClickHouse/issues/28118): Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#28182](https://github.com/ClickHouse/ClickHouse/issues/28182): Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). +* Backported in [#28260](https://github.com/ClickHouse/ClickHouse/issues/28260): Multiple small fixes for projections. See detailed description in pr. [#28178](https://github.com/ClickHouse/ClickHouse/pull/28178) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#28257](https://github.com/ClickHouse/ClickHouse/issues/28257): Fix incorrect behavior in `clickhouse-keeper` when list watches (`getChildren`) triggered with `set` requests for children. [#28190](https://github.com/ClickHouse/ClickHouse/pull/28190) ([alesapin](https://github.com/alesapin)). +* Backported in [#28343](https://github.com/ClickHouse/ClickHouse/issues/28343): Fix a rare bug in `clickhouse-keeper` when the client can receive a watch response before request-response. [#28197](https://github.com/ClickHouse/ClickHouse/pull/28197) ([alesapin](https://github.com/alesapin)). +* Backported in [#28261](https://github.com/ClickHouse/ClickHouse/issues/28261): Fix possible read of uninitialized memory for queries with `Nullable(LowCardinality)` type and extremes. Fixes [#28165](https://github.com/ClickHouse/ClickHouse/issues/28165). [#28205](https://github.com/ClickHouse/ClickHouse/pull/28205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#28253](https://github.com/ClickHouse/ClickHouse/issues/28253): Fix reading of custom TLD w/o new line at EOF. [#28213](https://github.com/ClickHouse/ClickHouse/pull/28213) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28294](https://github.com/ClickHouse/ClickHouse/issues/28294): Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28401](https://github.com/ClickHouse/ClickHouse/issues/28401): Fix intersecting parts due to new part had been replaced with an empty part. [#28310](https://github.com/ClickHouse/ClickHouse/pull/28310) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28688](https://github.com/ClickHouse/ClickHouse/issues/28688): Fix NOT-IN index optimization when not all key columns are used. This fixes [#28120](https://github.com/ClickHouse/ClickHouse/issues/28120). [#28315](https://github.com/ClickHouse/ClickHouse/pull/28315) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#28435](https://github.com/ClickHouse/ClickHouse/issues/28435): Fix non joined rows from nullable column. Close [#27691](https://github.com/ClickHouse/ClickHouse/issues/27691). [#28349](https://github.com/ClickHouse/ClickHouse/pull/28349) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#28645](https://github.com/ClickHouse/ClickHouse/issues/28645): Fix rare case when changes of `clickhouse-keeper` settings may lead to lost logs and server hung. [#28360](https://github.com/ClickHouse/ClickHouse/pull/28360) ([alesapin](https://github.com/alesapin)). +* Backported in [#28507](https://github.com/ClickHouse/ClickHouse/issues/28507): Fix lack of quotes for table names in MaterializedPostgreSQL engine. Closes [#28316](https://github.com/ClickHouse/ClickHouse/issues/28316). [#28433](https://github.com/ClickHouse/ClickHouse/pull/28433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#28480](https://github.com/ClickHouse/ClickHouse/issues/28480): Fixed possible ZooKeeper watches leak on background processing of distributed DDL queue. Closes [#26036](https://github.com/ClickHouse/ClickHouse/issues/26036). [#28446](https://github.com/ClickHouse/ClickHouse/pull/28446) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28572](https://github.com/ClickHouse/ClickHouse/issues/28572): Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after alter of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). +* Backported in [#28594](https://github.com/ClickHouse/ClickHouse/issues/28594): Fix `There is no subcolumn` error, while select from tables, which have `Nested` columns and scalar columns with dot in name and the same prefix as `Nested` (e.g. `n.id UInt32, n.arr1 Array(UInt64), n.arr2 Array(UInt64)`). [#28531](https://github.com/ClickHouse/ClickHouse/pull/28531) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28657](https://github.com/ClickHouse/ClickHouse/issues/28657): Fix UUID overlap in DROP TABLE for internal DDL from MaterializeMySQL. [#28533](https://github.com/ClickHouse/ClickHouse/pull/28533) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28568](https://github.com/ClickHouse/ClickHouse/issues/28568): Fix endless loop for truncated bzip2 archive. [#28543](https://github.com/ClickHouse/ClickHouse/pull/28543) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28704](https://github.com/ClickHouse/ClickHouse/issues/28704): - Fix the number of arguments required by s2RectAdd and s2RectContains functions. [#28663](https://github.com/ClickHouse/ClickHouse/pull/28663) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#28715](https://github.com/ClickHouse/ClickHouse/issues/28715): Add Settings.Names, Settings.Values aliases for system.processes table. [#28685](https://github.com/ClickHouse/ClickHouse/pull/28685) ([Vitaly Orlov](https://github.com/orloffv)). +* Backported in [#28744](https://github.com/ClickHouse/ClickHouse/issues/28744): Fix the coredump in the creation of distributed tables, when the parameters passed in are wrong. [#28686](https://github.com/ClickHouse/ClickHouse/pull/28686) ([Zhiyong Wang](https://github.com/ljcui)). + diff --git a/docs/changelogs/v21.9.3.30-stable.md b/docs/changelogs/v21.9.3.30-stable.md new file mode 100644 index 00000000000..3b665365668 --- /dev/null +++ b/docs/changelogs/v21.9.3.30-stable.md @@ -0,0 +1,16 @@ +### ClickHouse release v21.9.3.30-stable FIXME as compared to v21.9.2.17-stable + +#### Improvement +* Backported in [#28897](https://github.com/ClickHouse/ClickHouse/issues/28897): Use real tmp file instead of predefined "rows_sources" for vertical merges. This avoids generating garbage directories in tmp disks. [#28299](https://github.com/ClickHouse/ClickHouse/pull/28299) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#28815](https://github.com/ClickHouse/ClickHouse/issues/28815): Fix possible crash for `SELECT` with partially created aggregate projection in case of exception. [#28700](https://github.com/ClickHouse/ClickHouse/pull/28700) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#28789](https://github.com/ClickHouse/ClickHouse/issues/28789): Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). +* Backported in [#28842](https://github.com/ClickHouse/ClickHouse/issues/28842): Fix expressions compilation with short circuit evaluation. [#28821](https://github.com/ClickHouse/ClickHouse/pull/28821) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#28993](https://github.com/ClickHouse/ClickHouse/issues/28993): Fixed a race condition between `DROP PART` and `REPLACE/MOVE PARTITION` that might cause replicas to diverge in rare cases. [#28864](https://github.com/ClickHouse/ClickHouse/pull/28864) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#28949](https://github.com/ClickHouse/ClickHouse/issues/28949): Fix reading of subcolumns from compact parts. [#28873](https://github.com/ClickHouse/ClickHouse/pull/28873) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#28925](https://github.com/ClickHouse/ClickHouse/issues/28925): Fix bug with LowCardinality in short-curcuit function evaluation. Closes [#28884](https://github.com/ClickHouse/ClickHouse/issues/28884). [#28887](https://github.com/ClickHouse/ClickHouse/pull/28887) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#28927](https://github.com/ClickHouse/ClickHouse/issues/28927): Fix higher-order array functions (`SIGSEGV` for `arrayCompact`/`ILLEGAL_COLUMN` for `arrayDifference`/`arrayCumSumNonNegative`) with consts. [#28904](https://github.com/ClickHouse/ClickHouse/pull/28904) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29025](https://github.com/ClickHouse/ClickHouse/issues/29025): Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v21.9.4.35-stable.md b/docs/changelogs/v21.9.4.35-stable.md new file mode 100644 index 00000000000..8b919ecb268 --- /dev/null +++ b/docs/changelogs/v21.9.4.35-stable.md @@ -0,0 +1,6 @@ +### ClickHouse release v21.9.4.35-stable FIXME as compared to v21.9.3.30-stable + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29191](https://github.com/ClickHouse/ClickHouse/issues/29191): Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v21.9.5.16-stable.md b/docs/changelogs/v21.9.5.16-stable.md new file mode 100644 index 00000000000..7287c58064d --- /dev/null +++ b/docs/changelogs/v21.9.5.16-stable.md @@ -0,0 +1,48 @@ +### ClickHouse release v21.9.5.16-stable FIXME as compared to v21.9.4.35-stable + +#### Improvement +* Backported in [#29897](https://github.com/ClickHouse/ClickHouse/issues/29897): Added partitioned table prefix 'p' for the query for fetching replica identity index. [#29828](https://github.com/ClickHouse/ClickHouse/pull/29828) ([Shoh Jahon](https://github.com/Shohjahon)). +* Backported in [#29943](https://github.com/ClickHouse/ClickHouse/issues/29943): Update zoneinfo files to 2021c. [#29925](https://github.com/ClickHouse/ClickHouse/pull/29925) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#29775](https://github.com/ClickHouse/ClickHouse/issues/29775): Allow using a materialized column as the sharding key in a distributed table even if `insert_allow_materialized_columns=0`:. [#28637](https://github.com/ClickHouse/ClickHouse/pull/28637) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#29126](https://github.com/ClickHouse/ClickHouse/issues/29126): Fix bug in `clickhouse-keeper-converter` which can lead to incorrect ZooKeeper log deserialization. [#29071](https://github.com/ClickHouse/ClickHouse/pull/29071) ([小路](https://github.com/nicelulu)). +* Backported in [#29972](https://github.com/ClickHouse/ClickHouse/issues/29972): Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#30052](https://github.com/ClickHouse/ClickHouse/issues/30052): Fix releasing query ID and session ID at the end of query processing while handing gRPC call. This PR fixes flaky test [test_grpc_protocol/test.py::test_session](https://clickhouse-test-reports.s3.yandex.net/0/1ac03811a2df9717fa7c633d1af03def821d24b6/integration_tests_(memory).html). [#29954](https://github.com/ClickHouse/ClickHouse/pull/29954) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#29055](https://github.com/ClickHouse/ClickHouse/issues/29055): Fix invalid constant type conversion when nullable or lowcardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#29107](https://github.com/ClickHouse/ClickHouse/issues/29107): Fix waiting for mutation with `mutations_sync=2`. [#28889](https://github.com/ClickHouse/ClickHouse/pull/28889) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29815](https://github.com/ClickHouse/ClickHouse/issues/29815): Do not allow to reuse previous credentials in case of inter-server secret (Before INSERT via Buffer/Kafka to Distributed table with interserver secret configured for that cluster, may re-use previously set user for that connection). [#29060](https://github.com/ClickHouse/ClickHouse/pull/29060) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29399](https://github.com/ClickHouse/ClickHouse/issues/29399): Send normal `Database doesn't exist error` (`UNKNOWN_DATABASE`) to the client (via TCP) instead of `Attempt to read after eof` (`ATTEMPT_TO_READ_AFTER_EOF`). [#29229](https://github.com/ClickHouse/ClickHouse/pull/29229) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29356](https://github.com/ClickHouse/ClickHouse/issues/29356): Fix possible `Table columns structure in ZooKeeper is different from local table structure` exception while recreating or creating new replicas of `ReplicatedMergeTree`, when one of table columns have default expressions with case-insensitive functions. [#29266](https://github.com/ClickHouse/ClickHouse/pull/29266) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#29451](https://github.com/ClickHouse/ClickHouse/issues/29451): Fix failed assertion in ReadBufferFromHDFS. Update libhdfs3 library to be able to run in tests in debug. Closes [#29251](https://github.com/ClickHouse/ClickHouse/issues/29251). Closes [#27814](https://github.com/ClickHouse/ClickHouse/issues/27814). [#29276](https://github.com/ClickHouse/ClickHouse/pull/29276) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29301](https://github.com/ClickHouse/ClickHouse/issues/29301): Fix connection timeouts (`send_timeout`/`receive_timeout`). [#29282](https://github.com/ClickHouse/ClickHouse/pull/29282) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29383](https://github.com/ClickHouse/ClickHouse/issues/29383): Remove window function `nth_value` as it is not memory-safe. This closes [#29347](https://github.com/ClickHouse/ClickHouse/issues/29347). [#29348](https://github.com/ClickHouse/ClickHouse/pull/29348) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#29438](https://github.com/ClickHouse/ClickHouse/issues/29438): Fix replicated access storage not shutting down cleanly when misconfigured. [#29388](https://github.com/ClickHouse/ClickHouse/pull/29388) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Backported in [#29489](https://github.com/ClickHouse/ClickHouse/issues/29489): Fix Logical error `Cannot capture columns` in functions greatest/least. Closes [#29334](https://github.com/ClickHouse/ClickHouse/issues/29334). [#29454](https://github.com/ClickHouse/ClickHouse/pull/29454) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#29536](https://github.com/ClickHouse/ClickHouse/issues/29536): Fix possible `Block structure mismatch` for subqueries with pushed-down `HAVING` predicate. Fixes [#29010](https://github.com/ClickHouse/ClickHouse/issues/29010). [#29475](https://github.com/ClickHouse/ClickHouse/pull/29475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29592](https://github.com/ClickHouse/ClickHouse/issues/29592): In ODBC bridge add retries for error Invalid cursor state. It is a retriable error. Closes [#29473](https://github.com/ClickHouse/ClickHouse/issues/29473). [#29518](https://github.com/ClickHouse/ClickHouse/pull/29518) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29572](https://github.com/ClickHouse/ClickHouse/issues/29572): Fix bug in check `pathStartsWith` becuase there was bug with the usage of `std::mismatch`: ` The behavior is undefined if the second range is shorter than the first range.`. [#29531](https://github.com/ClickHouse/ClickHouse/pull/29531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#29630](https://github.com/ClickHouse/ClickHouse/issues/29630): Fix rare segfault in `ALTER MODIFY` query when using incorrect table identifier in `DEFAULT` expression like `x.y.z...` Fixes [#29184](https://github.com/ClickHouse/ClickHouse/issues/29184). [#29573](https://github.com/ClickHouse/ClickHouse/pull/29573) ([alesapin](https://github.com/alesapin)). +* Backported in [#29658](https://github.com/ClickHouse/ClickHouse/issues/29658): Fix JIT expression compilation with aliases and short-circuit expression evaluation. Closes [#29403](https://github.com/ClickHouse/ClickHouse/issues/29403). [#29574](https://github.com/ClickHouse/ClickHouse/pull/29574) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#29751](https://github.com/ClickHouse/ClickHouse/issues/29751): Condition in filter predicate could be lost after push-down optimisation. [#29625](https://github.com/ClickHouse/ClickHouse/pull/29625) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#29849](https://github.com/ClickHouse/ClickHouse/issues/29849): Fix concurrent access to `LowCardinality` during `GROUP BY` (leads to SIGSEGV). [#29782](https://github.com/ClickHouse/ClickHouse/pull/29782) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#29909](https://github.com/ClickHouse/ClickHouse/issues/29909): Fix bad cast in `ATTACH TABLE ... FROM 'path'` query when non-string literal is used instead of path. It may lead to reading of uninitialized memory. [#29790](https://github.com/ClickHouse/ClickHouse/pull/29790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#29865](https://github.com/ClickHouse/ClickHouse/issues/29865): Avoid `Timeout exceeded: elapsed 18446744073.709553 seconds` error that might happen in extremely rare cases, presumably due to some bug in kernel. Fixes [#29154](https://github.com/ClickHouse/ClickHouse/issues/29154). [#29811](https://github.com/ClickHouse/ClickHouse/pull/29811) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30024](https://github.com/ClickHouse/ClickHouse/issues/30024): MaterializedMySQL: Fix an issue where if the connection to MySQL was lost, only parts of a transaction could be processed. [#29837](https://github.com/ClickHouse/ClickHouse/pull/29837) ([Håvard Kvålen](https://github.com/havardk)). +* Backported in [#29878](https://github.com/ClickHouse/ClickHouse/issues/29878): Fix system tables recreation check (fails to detect changes in enum values). [#29857](https://github.com/ClickHouse/ClickHouse/pull/29857) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30057](https://github.com/ClickHouse/ClickHouse/issues/30057): Fix potential resource leak of the concurrent query limit of merge tree tables introduced in https://github.com/ClickHouse/ClickHouse/pull/19544 . [#29879](https://github.com/ClickHouse/ClickHouse/pull/29879) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#30204](https://github.com/ClickHouse/ClickHouse/issues/30204): Fix data-race between `LogSink::writeMarks()` and `LogSource` in `StorageLog`. [#29946](https://github.com/ClickHouse/ClickHouse/pull/29946) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30209](https://github.com/ClickHouse/ClickHouse/issues/30209): Fix possible data-race between `FileChecker` and `StorageLog`/`StorageStripeLog`. [#29959](https://github.com/ClickHouse/ClickHouse/pull/29959) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30070](https://github.com/ClickHouse/ClickHouse/issues/30070): Fix crash of sample by `tuple()`, closes [#30004](https://github.com/ClickHouse/ClickHouse/issues/30004). [#30016](https://github.com/ClickHouse/ClickHouse/pull/30016) ([flynn](https://github.com/ucasfl)). +* Backported in [#30128](https://github.com/ClickHouse/ClickHouse/issues/30128): Dropped `Memory` database might reappear after server restart, it's fixed ([#29795](https://github.com/ClickHouse/ClickHouse/issues/29795)). Also added `force_remove_data_recursively_on_drop` setting as a workaround for `Directory not empty` error when dropping `Ordinary` database (because it's not possible to remove data leftovers manually in cloud environment). [#30054](https://github.com/ClickHouse/ClickHouse/pull/30054) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30262](https://github.com/ClickHouse/ClickHouse/issues/30262): FlatDictionary, HashedDictionary fix bytes_allocated calculation for nullable attributes. [#30238](https://github.com/ClickHouse/ClickHouse/pull/30238) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#30306](https://github.com/ClickHouse/ClickHouse/issues/30306): Fix crash with shortcircuit and lowcardinality in multiIf. [#30243](https://github.com/ClickHouse/ClickHouse/pull/30243) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#30290](https://github.com/ClickHouse/ClickHouse/issues/30290): Fix ComplexKeyHashedDictionary, ComplexKeySparseHashedDictionary parsing `preallocate` option from layout config. [#30246](https://github.com/ClickHouse/ClickHouse/pull/30246) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NO CL CATEGORY + +* Avoid deadlocks when reading and writting on JOIN Engine tables at the same time. [#30185](https://github.com/ClickHouse/ClickHouse/pull/30185) ([Raúl Marín](https://github.com/Algunenano)). + diff --git a/docs/changelogs/v21.9.6.24-stable.md b/docs/changelogs/v21.9.6.24-stable.md new file mode 100644 index 00000000000..f1d097ab646 --- /dev/null +++ b/docs/changelogs/v21.9.6.24-stable.md @@ -0,0 +1,57 @@ +### ClickHouse release v21.9.6.24-stable FIXME as compared to v21.9.5.16-stable + +#### New Feature +* Backported in [#30714](https://github.com/ClickHouse/ClickHouse/issues/30714): CompiledExpressionCache limit elements size using `compiled_expression_cache_elements_size` setting. [#30667](https://github.com/ClickHouse/ClickHouse/pull/30667) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* Backported in [#31734](https://github.com/ClickHouse/ClickHouse/issues/31734): Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Backported in [#30470](https://github.com/ClickHouse/ClickHouse/issues/30470): Allow symlinks to files in user_files directory for file table function. [#30309](https://github.com/ClickHouse/ClickHouse/pull/30309) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Backported in [#30664](https://github.com/ClickHouse/ClickHouse/issues/30664): Fix reading from empty file on encrypted disk. [#30494](https://github.com/ClickHouse/ClickHouse/pull/30494) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#31371](https://github.com/ClickHouse/ClickHouse/issues/31371): Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#31576](https://github.com/ClickHouse/ClickHouse/issues/31576): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release + +* Backported in [#30916](https://github.com/ClickHouse/ClickHouse/issues/30916): Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#30823](https://github.com/ClickHouse/ClickHouse/issues/30823): Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30609](https://github.com/ClickHouse/ClickHouse/issues/30609): Fix bad optimizations of ORDER BY if it contains WITH FILL. This closes [#28908](https://github.com/ClickHouse/ClickHouse/issues/28908). This closes [#26049](https://github.com/ClickHouse/ClickHouse/issues/26049). [#28910](https://github.com/ClickHouse/ClickHouse/pull/28910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#30765](https://github.com/ClickHouse/ClickHouse/issues/30765): Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Backported in [#30505](https://github.com/ClickHouse/ClickHouse/issues/30505): Fixed incorrect behaviour of setting `materialized_postgresql_tables_list` at server restart. Found in [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#29686](https://github.com/ClickHouse/ClickHouse/pull/29686) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#30464](https://github.com/ClickHouse/ClickHouse/issues/30464): Support nullable arguments in function `initializeAggregation`. [#30177](https://github.com/ClickHouse/ClickHouse/pull/30177) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#30657](https://github.com/ClickHouse/ClickHouse/issues/30657): Fix `[I]LIKE` function. Closes [#28661](https://github.com/ClickHouse/ClickHouse/issues/28661). [#30244](https://github.com/ClickHouse/ClickHouse/pull/30244) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#30524](https://github.com/ClickHouse/ClickHouse/issues/30524): Fixed segfault which might happen if session expired during execution of REPLACE PARTITION. [#30432](https://github.com/ClickHouse/ClickHouse/pull/30432) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30586](https://github.com/ClickHouse/ClickHouse/issues/30586): * Fix deadlock on ALTER with scalar subquery to the same table, close [#30461](https://github.com/ClickHouse/ClickHouse/issues/30461). [#30492](https://github.com/ClickHouse/ClickHouse/pull/30492) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#30606](https://github.com/ClickHouse/ClickHouse/issues/30606): Limit push down optimization could cause a error `Cannot find column`. Fixes [#30438](https://github.com/ClickHouse/ClickHouse/issues/30438). [#30562](https://github.com/ClickHouse/ClickHouse/pull/30562) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#30752](https://github.com/ClickHouse/ClickHouse/issues/30752): Functions for case-insensitive search in UTF8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30711](https://github.com/ClickHouse/ClickHouse/issues/30711): Fix PREWHERE with WHERE in case of always true PREWHERE. [#30668](https://github.com/ClickHouse/ClickHouse/pull/30668) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#30769](https://github.com/ClickHouse/ClickHouse/issues/30769): Fixed a race condition between `REPLACE/MOVE PARTITION` and background merge in non-replicated `MergeTree` that might cause a part of moved/replaced data to remain in partition. Fixes [#29327](https://github.com/ClickHouse/ClickHouse/issues/29327). [#30717](https://github.com/ClickHouse/ClickHouse/pull/30717) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30858](https://github.com/ClickHouse/ClickHouse/issues/30858): Fixed ambiguity when extracting auxiliary ZooKeeper name from ZooKeeper path in `ReplicatedMergeTree`. Previously server might fail to start with `Unknown auxiliary ZooKeeper name` if ZooKeeper path contains a colon. Fixes [#29052](https://github.com/ClickHouse/ClickHouse/issues/29052). Also it was allowed to specify ZooKeeper path that does not start with slash, but now it's deprecated and creation of new tables with such path is not allowed. Slashes and colons in auxiliary ZooKeeper names are not allowed too. [#30822](https://github.com/ClickHouse/ClickHouse/pull/30822) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#30924](https://github.com/ClickHouse/ClickHouse/issues/30924): Fix set index not used in AND/OR expressions when there are more than two operands. This fixes [#30416](https://github.com/ClickHouse/ClickHouse/issues/30416) . [#30887](https://github.com/ClickHouse/ClickHouse/pull/30887) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#31290](https://github.com/ClickHouse/ClickHouse/issues/31290): Fix some corner cases with intersect/except. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31153](https://github.com/ClickHouse/ClickHouse/issues/31153): Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Backported in [#31039](https://github.com/ClickHouse/ClickHouse/issues/31039): Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31131](https://github.com/ClickHouse/ClickHouse/issues/31131): Fix JSONValue/Query with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31205](https://github.com/ClickHouse/ClickHouse/issues/31205): Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31375](https://github.com/ClickHouse/ClickHouse/issues/31375): Fix StorageMerge with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#31254](https://github.com/ClickHouse/ClickHouse/issues/31254): Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). +* Backported in [#31520](https://github.com/ClickHouse/ClickHouse/issues/31520): Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Backported in [#31553](https://github.com/ClickHouse/ClickHouse/issues/31553): Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#31583](https://github.com/ClickHouse/ClickHouse/issues/31583): * Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). * Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#31601](https://github.com/ClickHouse/ClickHouse/issues/31601): Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Backported in [#31746](https://github.com/ClickHouse/ClickHouse/issues/31746): `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31793](https://github.com/ClickHouse/ClickHouse/issues/31793): Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31828](https://github.com/ClickHouse/ClickHouse/issues/31828): Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#31817](https://github.com/ClickHouse/ClickHouse/issues/31817): Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#31759](https://github.com/ClickHouse/ClickHouse/issues/31759): Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#31893](https://github.com/ClickHouse/ClickHouse/issues/31893): Fix possible assertion `../src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion '!hasPendingData()' failed.` in TSKV format. [#31804](https://github.com/ClickHouse/ClickHouse/pull/31804) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#32030](https://github.com/ClickHouse/ClickHouse/issues/32030): Fix invalid cast of nullable type when nullable primary key is used. This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#32078](https://github.com/ClickHouse/ClickHouse/issues/32078): Fix a bug about function transform with decimal args. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([Shuai li](https://github.com/loneylee)). +* Backported in [#31907](https://github.com/ClickHouse/ClickHouse/issues/31907): Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). + From b3a93601a8b055fdebf5a92d236f94ce3d6ee55d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 25 May 2022 00:01:42 +0200 Subject: [PATCH 499/615] Add changelogs for 2020 where the script works --- docs/changelogs/v20.10.1.4881-prestable.md | 197 +++++++++++ docs/changelogs/v20.10.2.20-stable.md | 180 ++++++++++ docs/changelogs/v20.10.3.30-stable.md | 17 + docs/changelogs/v20.10.4.1-stable.md | 17 + docs/changelogs/v20.10.5.10-stable.md | 18 + docs/changelogs/v20.10.6.27-stable.md | 31 ++ docs/changelogs/v20.10.7.4-stable.md | 13 + docs/changelogs/v20.11.1.5109-prestable.md | 157 +++++++++ docs/changelogs/v20.11.2.1-stable.md | 162 +++++++++ docs/changelogs/v20.11.3.3-stable.md | 5 + docs/changelogs/v20.11.4.13-stable.md | 20 ++ docs/changelogs/v20.11.5.18-stable.md | 33 ++ docs/changelogs/v20.11.6.6-stable.md | 14 + docs/changelogs/v20.11.7.16-stable.md | 62 ++++ docs/changelogs/v20.12.1.5236-prestable.md | 98 ++++++ docs/changelogs/v20.12.2.1-stable.md | 127 +++++++ docs/changelogs/v20.12.3.3-stable.md | 2 + docs/changelogs/v20.12.4.5-stable.md | 14 + docs/changelogs/v20.12.5.14-stable.md | 14 + docs/changelogs/v20.12.5.18-stable.md | 57 ++++ docs/changelogs/v20.12.6.29-stable.md | 18 + docs/changelogs/v20.12.7.3-stable.md | 9 + docs/changelogs/v20.12.8.5-stable.md | 9 + docs/changelogs/v20.5.1.3833-prestable.md | 380 +++++++++++++++++++++ docs/changelogs/v20.5.2.7-stable.md | 33 ++ docs/changelogs/v20.5.3.27-stable.md | 60 ++++ docs/changelogs/v20.5.4.40-stable.md | 32 ++ docs/changelogs/v20.5.5.74-stable.md | 34 ++ docs/changelogs/v20.6.1.4066-prestable.md | 184 ++++++++++ docs/changelogs/v20.6.10.2-stable.md | 2 + docs/changelogs/v20.6.11.1-stable.md | 2 + docs/changelogs/v20.6.2.15-prestable.md | 204 +++++++++++ docs/changelogs/v20.6.3.28-stable.md | 17 + docs/changelogs/v20.6.4.44-stable.md | 22 ++ docs/changelogs/v20.6.5.8-stable.md | 33 ++ docs/changelogs/v20.6.6.7-stable.md | 13 + docs/changelogs/v20.6.7.4-stable.md | 9 + docs/changelogs/v20.6.8.5-stable.md | 32 ++ docs/changelogs/v20.6.9.1-stable.md | 2 + docs/changelogs/v20.7.1.4310-prestable.md | 170 +++++++++ docs/changelogs/v20.7.2.30-stable.md | 41 +++ docs/changelogs/v20.7.3.7-stable.md | 25 ++ docs/changelogs/v20.7.4.11-stable.md | 36 ++ docs/changelogs/v20.8.1.4513-prestable.md | 133 ++++++++ docs/changelogs/v20.8.10.13-lts.md | 6 + docs/changelogs/v20.8.11.17-lts.md | 5 + docs/changelogs/v20.8.12.2-lts.md | 5 + docs/changelogs/v20.8.13.15-lts.md | 42 +++ docs/changelogs/v20.8.14.4-lts.md | 19 ++ docs/changelogs/v20.8.15.11-lts.md | 17 + docs/changelogs/v20.8.16.20-lts.md | 6 + docs/changelogs/v20.8.17.25-lts.md | 10 + docs/changelogs/v20.8.18.32-lts.md | 8 + docs/changelogs/v20.8.19.4-stable.md | 29 ++ docs/changelogs/v20.8.2.3-stable.md | 143 ++++++++ docs/changelogs/v20.8.3.18-stable.md | 16 + docs/changelogs/v20.8.4.11-lts.md | 38 +++ docs/changelogs/v20.8.5.45-lts.md | 37 ++ docs/changelogs/v20.8.6.6-lts.md | 12 + docs/changelogs/v20.8.7.15-lts.md | 19 ++ docs/changelogs/v20.8.8.2-lts.md | 10 + docs/changelogs/v20.8.9.6-lts.md | 2 + docs/changelogs/v20.9.1.4585-prestable.md | 63 ++++ docs/changelogs/v20.9.2.20-stable.md | 75 ++++ docs/changelogs/v20.9.3.45-stable.md | 33 ++ docs/changelogs/v20.9.4.76-stable.md | 37 ++ docs/changelogs/v20.9.5.5-stable.md | 12 + docs/changelogs/v20.9.6.14-stable.md | 19 ++ docs/changelogs/v20.9.7.11-stable.md | 27 ++ 69 files changed, 3428 insertions(+) create mode 100644 docs/changelogs/v20.10.1.4881-prestable.md create mode 100644 docs/changelogs/v20.10.2.20-stable.md create mode 100644 docs/changelogs/v20.10.3.30-stable.md create mode 100644 docs/changelogs/v20.10.4.1-stable.md create mode 100644 docs/changelogs/v20.10.5.10-stable.md create mode 100644 docs/changelogs/v20.10.6.27-stable.md create mode 100644 docs/changelogs/v20.10.7.4-stable.md create mode 100644 docs/changelogs/v20.11.1.5109-prestable.md create mode 100644 docs/changelogs/v20.11.2.1-stable.md create mode 100644 docs/changelogs/v20.11.3.3-stable.md create mode 100644 docs/changelogs/v20.11.4.13-stable.md create mode 100644 docs/changelogs/v20.11.5.18-stable.md create mode 100644 docs/changelogs/v20.11.6.6-stable.md create mode 100644 docs/changelogs/v20.11.7.16-stable.md create mode 100644 docs/changelogs/v20.12.1.5236-prestable.md create mode 100644 docs/changelogs/v20.12.2.1-stable.md create mode 100644 docs/changelogs/v20.12.3.3-stable.md create mode 100644 docs/changelogs/v20.12.4.5-stable.md create mode 100644 docs/changelogs/v20.12.5.14-stable.md create mode 100644 docs/changelogs/v20.12.5.18-stable.md create mode 100644 docs/changelogs/v20.12.6.29-stable.md create mode 100644 docs/changelogs/v20.12.7.3-stable.md create mode 100644 docs/changelogs/v20.12.8.5-stable.md create mode 100644 docs/changelogs/v20.5.1.3833-prestable.md create mode 100644 docs/changelogs/v20.5.2.7-stable.md create mode 100644 docs/changelogs/v20.5.3.27-stable.md create mode 100644 docs/changelogs/v20.5.4.40-stable.md create mode 100644 docs/changelogs/v20.5.5.74-stable.md create mode 100644 docs/changelogs/v20.6.1.4066-prestable.md create mode 100644 docs/changelogs/v20.6.10.2-stable.md create mode 100644 docs/changelogs/v20.6.11.1-stable.md create mode 100644 docs/changelogs/v20.6.2.15-prestable.md create mode 100644 docs/changelogs/v20.6.3.28-stable.md create mode 100644 docs/changelogs/v20.6.4.44-stable.md create mode 100644 docs/changelogs/v20.6.5.8-stable.md create mode 100644 docs/changelogs/v20.6.6.7-stable.md create mode 100644 docs/changelogs/v20.6.7.4-stable.md create mode 100644 docs/changelogs/v20.6.8.5-stable.md create mode 100644 docs/changelogs/v20.6.9.1-stable.md create mode 100644 docs/changelogs/v20.7.1.4310-prestable.md create mode 100644 docs/changelogs/v20.7.2.30-stable.md create mode 100644 docs/changelogs/v20.7.3.7-stable.md create mode 100644 docs/changelogs/v20.7.4.11-stable.md create mode 100644 docs/changelogs/v20.8.1.4513-prestable.md create mode 100644 docs/changelogs/v20.8.10.13-lts.md create mode 100644 docs/changelogs/v20.8.11.17-lts.md create mode 100644 docs/changelogs/v20.8.12.2-lts.md create mode 100644 docs/changelogs/v20.8.13.15-lts.md create mode 100644 docs/changelogs/v20.8.14.4-lts.md create mode 100644 docs/changelogs/v20.8.15.11-lts.md create mode 100644 docs/changelogs/v20.8.16.20-lts.md create mode 100644 docs/changelogs/v20.8.17.25-lts.md create mode 100644 docs/changelogs/v20.8.18.32-lts.md create mode 100644 docs/changelogs/v20.8.19.4-stable.md create mode 100644 docs/changelogs/v20.8.2.3-stable.md create mode 100644 docs/changelogs/v20.8.3.18-stable.md create mode 100644 docs/changelogs/v20.8.4.11-lts.md create mode 100644 docs/changelogs/v20.8.5.45-lts.md create mode 100644 docs/changelogs/v20.8.6.6-lts.md create mode 100644 docs/changelogs/v20.8.7.15-lts.md create mode 100644 docs/changelogs/v20.8.8.2-lts.md create mode 100644 docs/changelogs/v20.8.9.6-lts.md create mode 100644 docs/changelogs/v20.9.1.4585-prestable.md create mode 100644 docs/changelogs/v20.9.2.20-stable.md create mode 100644 docs/changelogs/v20.9.3.45-stable.md create mode 100644 docs/changelogs/v20.9.4.76-stable.md create mode 100644 docs/changelogs/v20.9.5.5-stable.md create mode 100644 docs/changelogs/v20.9.6.14-stable.md create mode 100644 docs/changelogs/v20.9.7.11-stable.md diff --git a/docs/changelogs/v20.10.1.4881-prestable.md b/docs/changelogs/v20.10.1.4881-prestable.md new file mode 100644 index 00000000000..a40830ddb43 --- /dev/null +++ b/docs/changelogs/v20.10.1.4881-prestable.md @@ -0,0 +1,197 @@ +### ClickHouse release v20.10.1.4881-prestable FIXME as compared to v20.9.1.4585-prestable + +#### Backward Incompatible Change +* Add support for nested multiline comments `/* comment /* comment */ */` in SQL. This conforms to the SQL standard. [#14655](https://github.com/ClickHouse/ClickHouse/pull/14655) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change default value of `format_regexp_escaping_rule` setting (it's related to `Regexp` format) to `Raw` (it means - read whole subpattern as a value) to make the behaviour more like to what users expect. [#15426](https://github.com/ClickHouse/ClickHouse/pull/15426) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `multiple_joins_rewriter_version` obsolete. Remove first version of joins rewriter. [#15472](https://github.com/ClickHouse/ClickHouse/pull/15472) ([Artem Zuikov](https://github.com/4ertus2)). + +#### New Feature +* Supporting MySQL types: `decimal` (as ClickHouse `Decimal`) and `datetime` with sub-second precision (as `DateTime64`). ... [#11512](https://github.com/ClickHouse/ClickHouse/pull/11512) ([Vasily Nemkov](https://github.com/Enmk)). +* Allow to turn on fsync on inserts, merges and fetches. [#11948](https://github.com/ClickHouse/ClickHouse/pull/11948) ([Anton Popov](https://github.com/CurtizJ)). +* Secure inter-cluster query execution (with initial_user as current query user). [#13156](https://github.com/ClickHouse/ClickHouse/pull/13156) ([Azat Khuzhin](https://github.com/azat)). +* * Add `mapPopulateSeries` function. [#13166](https://github.com/ClickHouse/ClickHouse/pull/13166) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Allow user to specify settings for `ReplicatedMergeTree*` storage in `` section of config file. It works similarly to `` section. For `ReplicatedMergeTree*` storages settings from `` and `` are applied together, but settings from `` has higher priority. Added `system.replicated_merge_tree_settings` table. [#13573](https://github.com/ClickHouse/ClickHouse/pull/13573) ([Amos Bird](https://github.com/amosbird)). +* Add new feature: format LineAsString that accepts a sequence of line separated by newlines, spaces and/or commas. [#13846](https://github.com/ClickHouse/ClickHouse/pull/13846) ([hexiaoting](https://github.com/hexiaoting)). +* New query complexity limit settings `max_rows_to_read_leaf`, `max_bytes_to_read_leaf` for distributed queries to limit max rows/bytes read on the leaf nodes. Limit is applied for local reads only, **excluding** the final merge stage on the root node. [#14221](https://github.com/ClickHouse/ClickHouse/pull/14221) ([Roman Khavronenko](https://github.com/hagen1778)). +* Add JSONStrings formats which output data in arrays of strings. [#14333](https://github.com/ClickHouse/ClickHouse/pull/14333) ([hcz](https://github.com/hczhcz)). +* Now insert statements can have asterisk (or variants) with column transformers in the column list. [#14453](https://github.com/ClickHouse/ClickHouse/pull/14453) ([Amos Bird](https://github.com/amosbird)). +* Added a script to import git repository to ClickHouse. [#14471](https://github.com/ClickHouse/ClickHouse/pull/14471) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add the ability to specify `TTL ... RECOMPRESS codec_name` for MergeTree table engines family. [#14494](https://github.com/ClickHouse/ClickHouse/pull/14494) ([alesapin](https://github.com/alesapin)). +* Add event_time_microseconds to `system.asynchronous_metric_log` & `system.metric_log` tables. [#14514](https://github.com/ClickHouse/ClickHouse/pull/14514) ([Bharat Nallan](https://github.com/bharatnc)). +* Add new feature: SHOW DATABASES LIKE 'xxx'. [#14521](https://github.com/ClickHouse/ClickHouse/pull/14521) ([hexiaoting](https://github.com/hexiaoting)). +* Support decimal data type for MaterializedMySQL. [#14535](https://github.com/ClickHouse/ClickHouse/pull/14535) ([Winter Zhang](https://github.com/zhang2014)). +* Allow configurable NULL representation for TSV output format. It is controlled by the setting `output_format_tsv_null_representation` which is `\N` by default. This closes [#9375](https://github.com/ClickHouse/ClickHouse/issues/9375). Note that the setting only controls output format and `\N` is the only supported NULL representation for TSV input format. [#14586](https://github.com/ClickHouse/ClickHouse/pull/14586) ([Kruglov Pavel](https://github.com/Avogar)). +* Add new feature: format LineAsString that accepts a sequence of line separated by newlines, spaces and/or commas. [#14703](https://github.com/ClickHouse/ClickHouse/pull/14703) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added `formatReadableQuantity` function. It is useful for reading big numbers by human. [#14725](https://github.com/ClickHouse/ClickHouse/pull/14725) ([Artem Hnilov](https://github.com/BooBSD)). +* Add the ability to remove column properties and table TTLs. Introduced queries `ALTER TABLE MODIFY COLUMN col_name REMOVE what_to_remove` and `ALTER TABLE REMOVE TTL`. Both operations are lightweight and executed at the metadata level. [#14742](https://github.com/ClickHouse/ClickHouse/pull/14742) ([alesapin](https://github.com/alesapin)). +* Introduce event_time_microseconds field to `system.text_log`, `system.trace_log`, `system.query_log` and `system.query_thread_log` tables. [#14760](https://github.com/ClickHouse/ClickHouse/pull/14760) ([Bharat Nallan](https://github.com/bharatnc)). +* Now we support `WITH AS (subquery) ... ` to introduce named subqueries in the query context. This closes [#2416](https://github.com/ClickHouse/ClickHouse/issues/2416). This closes [#4967](https://github.com/ClickHouse/ClickHouse/issues/4967). [#14771](https://github.com/ClickHouse/ClickHouse/pull/14771) ([Amos Bird](https://github.com/amosbird)). +* Allow to omit arguments for Replicated table engine if defaults are specified in config. [#14791](https://github.com/ClickHouse/ClickHouse/pull/14791) ([vxider](https://github.com/Vxider)). +* Add table function `null('structure')`. [#14797](https://github.com/ClickHouse/ClickHouse/pull/14797) ([vxider](https://github.com/Vxider)). +* Added query obfuscation tool. It allows to share more queries for better testing. This closes [#15268](https://github.com/ClickHouse/ClickHouse/issues/15268). [#15321](https://github.com/ClickHouse/ClickHouse/pull/15321) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added format `RawBLOB`. It is intended for input or output a single value without any escaping and delimiters. This closes [#15349](https://github.com/ClickHouse/ClickHouse/issues/15349). [#15364](https://github.com/ClickHouse/ClickHouse/pull/15364) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix [15350](https://github.com/ClickHouse/ClickHouse/issues/15350). [#15443](https://github.com/ClickHouse/ClickHouse/pull/15443) ([flynn](https://github.com/ucasfl)). +* Introduce `enable_global_with_statement` setting which propagates the first select's `WITH` statements to other select queries at the same level, and makes aliases in `WITH` statements visible to subqueries. [#15451](https://github.com/ClickHouse/ClickHouse/pull/15451) ([Amos Bird](https://github.com/amosbird)). +* Add the `reinterpretAsUUID` function that allows to convert a big-endian byte string to UUID. [#15480](https://github.com/ClickHouse/ClickHouse/pull/15480) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add parallel quorum inserts. This closes [#15601](https://github.com/ClickHouse/ClickHouse/issues/15601). [#15601](https://github.com/ClickHouse/ClickHouse/pull/15601) ([Latysheva Alexandra](https://github.com/alexelex)). + +#### Performance Improvement +* Enable compact parts by default for small parts. This will allow to process frequent inserts slightly more efficiently (4..100 times). [#11913](https://github.com/ClickHouse/ClickHouse/pull/11913) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of 256-bit bytes using (u)int64_t as base type for wide integers. Original wide integers use 8-bit types as base. [#14859](https://github.com/ClickHouse/ClickHouse/pull/14859) ([Artem Zuikov](https://github.com/4ertus2)). +* Only `mlock` code segment when starting clickhouse-server. In previous versions, all mapped regions were locked in memory, including debug info. Debug info is usually splitted to a separate file but if it isn't, it led to +2..3 GiB memory usage. [#14929](https://github.com/ClickHouse/ClickHouse/pull/14929) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We used to choose fixed key method to group by one fixed string. It's unnecessary since we have StringHashTable which do the similar packedFix optimization for FixedString columns. And we should use low_cardinality_key_fixed_string if possible. [#15034](https://github.com/ClickHouse/ClickHouse/pull/15034) ([Amos Bird](https://github.com/amosbird)). +* Fix `DateTime DateTime` mistakenly choosing the slow generic implementation. This fixes [#15153](https://github.com/ClickHouse/ClickHouse/issues/15153) . [#15178](https://github.com/ClickHouse/ClickHouse/pull/15178) ([Amos Bird](https://github.com/amosbird)). +* Use one S3 DeleteObjects request instead of multiple DeleteObject in cycle. No any functionality changes, so covered by existing tests like integration/test_log_family_s3. [#15238](https://github.com/ClickHouse/ClickHouse/pull/15238) ([ianton-ru](https://github.com/ianton-ru)). +* Faster 256-bit multiplication. [#15418](https://github.com/ClickHouse/ClickHouse/pull/15418) ([Artem Zuikov](https://github.com/4ertus2)). +* Improve `quantileTDigest` performance. This fixes [#2668](https://github.com/ClickHouse/ClickHouse/issues/2668). [#15542](https://github.com/ClickHouse/ClickHouse/pull/15542) ([Kruglov Pavel](https://github.com/Avogar)). +* Explicitly use a temporary disk to store vertical merge temporary data. [#15639](https://github.com/ClickHouse/ClickHouse/pull/15639) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). + +#### Improvement +* When duplicate block is written to replica where it does not exist locally (has not been fetched from replicas), don't ignore it and write locally to achieve the same effect as if it was successfully replicated. [#11684](https://github.com/ClickHouse/ClickHouse/pull/11684) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)). +* Now joinGet supports multi-key lookup. Continuation of [#12418](https://github.com/ClickHouse/ClickHouse/issues/12418). [#13015](https://github.com/ClickHouse/ClickHouse/pull/13015) ([Amos Bird](https://github.com/amosbird)). +* For INSERTs with inline data in VALUES format, support semicolon as the data terminator, in addition to the new line. Closes [#12288](https://github.com/ClickHouse/ClickHouse/issues/12288). [#13192](https://github.com/ClickHouse/ClickHouse/pull/13192) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* SYSTEM RELOAD CONFIG now throws an exception if failed to reload and continues using the previous users.xml. The background periodic reloading also continues using the previous users.xml if failed to reload. [#14492](https://github.com/ClickHouse/ClickHouse/pull/14492) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add an option to skip access checks for DiskS3. [#14497](https://github.com/ClickHouse/ClickHouse/pull/14497) ([Pavel Kovalenko](https://github.com/Jokser)). +* ClickHouse treats partition expr and key expr differently. Partition expr is used to construct an minmax index containing related columns, while primary key expr is stored as an expr. Sometimes user might partition a table at coarser levels, such as `partition by i / 1000`. However, binary operators are not monotonic and this PR tries to fix that. It might also benifit other use cases. [#14513](https://github.com/ClickHouse/ClickHouse/pull/14513) ([Amos Bird](https://github.com/amosbird)). +* Fix some trailing whitespaces in query format. [#14595](https://github.com/ClickHouse/ClickHouse/pull/14595) ([Azat Khuzhin](https://github.com/azat)). +* Add `QueryMemoryLimitExceeded` event. This closes [#14589](https://github.com/ClickHouse/ClickHouse/issues/14589). [#14647](https://github.com/ClickHouse/ClickHouse/pull/14647) ([fastio](https://github.com/fastio)). +* Fixed the backward-incompatible change by providing the options to build without debug info for functions. [#14657](https://github.com/ClickHouse/ClickHouse/pull/14657) ([Mike Kot](https://github.com/myrrc)). +* dynamic reload zookeeper config. [#14678](https://github.com/ClickHouse/ClickHouse/pull/14678) ([sundyli](https://github.com/sundy-li)). +* Allow parallel execution of distributed DDL. [#14684](https://github.com/ClickHouse/ClickHouse/pull/14684) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential memory leak caused by zookeeper exists watch. [#14693](https://github.com/ClickHouse/ClickHouse/pull/14693) ([hustnn](https://github.com/hustnn)). +* Fixed "Packet payload is not fully read" error in `MaterializeMySQL` database engine. [#14696](https://github.com/ClickHouse/ClickHouse/pull/14696) ([BohuTANG](https://github.com/BohuTANG)). +* Fix crash in `bitShiftLeft()` when called with negative big integer. [#14697](https://github.com/ClickHouse/ClickHouse/pull/14697) ([Artem Zuikov](https://github.com/4ertus2)). +* Add `merge_algorithm` to system.merges table to improve merging inspections. [#14705](https://github.com/ClickHouse/ClickHouse/pull/14705) ([Amos Bird](https://github.com/amosbird)). +* Less unneded code generated by DecimalBinaryOperation template in FunctionBinaryArithmetic. [#14743](https://github.com/ClickHouse/ClickHouse/pull/14743) ([Artem Zuikov](https://github.com/4ertus2)). +* Now columns can be used to wrap over a list of columns and apply column transformers afterwards. [#14775](https://github.com/ClickHouse/ClickHouse/pull/14775) ([Amos Bird](https://github.com/amosbird)). +* Support for disabling persistency for StorageJoin and StorageSet, this feature is controlled by setting `disable_set_and_join_persistency`. And this PR solved issue [#6318](https://github.com/ClickHouse/ClickHouse/issues/6318). [#14776](https://github.com/ClickHouse/ClickHouse/pull/14776) ([vxider](https://github.com/Vxider)). +* Construct `query_start_time` and `query_start_time_microseconds` from the same timespec. [#14831](https://github.com/ClickHouse/ClickHouse/pull/14831) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow using multi-volume storage configuration in storage Distributed. [#14839](https://github.com/ClickHouse/ClickHouse/pull/14839) ([Pavel Kovalenko](https://github.com/Jokser)). +* Show subqueries for `SET` and `JOIN` in `EXPLAIN` result. [#14856](https://github.com/ClickHouse/ClickHouse/pull/14856) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Provide a `load_balancing_first_offset` query setting to explicitly state what the first replica is. It's used together with `FIRST_OR_RANDOM` load balancing strategy, which allows to control replicas workload. [#14867](https://github.com/ClickHouse/ClickHouse/pull/14867) ([Amos Bird](https://github.com/amosbird)). +* Fixed excessive settings constraint violation when running SELECT with SETTINGS from a distributed table. [#14876](https://github.com/ClickHouse/ClickHouse/pull/14876) ([Amos Bird](https://github.com/amosbird)). +* Allow to drop Replicated table if previous drop attempt was failed due to ZooKeeper session expiration. This fixes [#11891](https://github.com/ClickHouse/ClickHouse/issues/11891). [#14926](https://github.com/ClickHouse/ClickHouse/pull/14926) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid deadlock when executing INSERT SELECT into itself from a table with `TinyLog` or `Log` table engines. This closes [#6802](https://github.com/ClickHouse/ClickHouse/issues/6802). [#14962](https://github.com/ClickHouse/ClickHouse/pull/14962) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ignore key constraints when doing mutations. Without this pr, it's not possible to do mutations when `force_index_by_date = 1` or `force_primary_key = 1`. [#14973](https://github.com/ClickHouse/ClickHouse/pull/14973) ([Amos Bird](https://github.com/amosbird)). +* Add option to disable TTL move on data part insert. [#15000](https://github.com/ClickHouse/ClickHouse/pull/15000) ([Pavel Kovalenko](https://github.com/Jokser)). +* Enable `Atomic` database engine by default. [#15003](https://github.com/ClickHouse/ClickHouse/pull/15003) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Proper exception message for wrong number of arguments of CAST. This closes [#13992](https://github.com/ClickHouse/ClickHouse/issues/13992). [#15029](https://github.com/ClickHouse/ClickHouse/pull/15029) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add the ability to specify specialized codecs like `Delta`, `T64`, etc. for columns with subtypes. Implements [#12551](https://github.com/ClickHouse/ClickHouse/issues/12551), fixes [#11397](https://github.com/ClickHouse/ClickHouse/issues/11397), fixes [#4609](https://github.com/ClickHouse/ClickHouse/issues/4609). [#15089](https://github.com/ClickHouse/ClickHouse/pull/15089) ([alesapin](https://github.com/alesapin)). +* Added `optimize` setting to `EXPLAIN PLAN` query. If enabled, query plan level optimisations are applied. Enabled by default. [#15201](https://github.com/ClickHouse/ClickHouse/pull/15201) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not allow connections to ClickHouse server until all scripts in `/docker-entrypoint-initdb.d/` are executed. [#15244](https://github.com/ClickHouse/ClickHouse/pull/15244) ([Aleksei Kozharin](https://github.com/alekseik1)). +* fix [15264](https://github.com/ClickHouse/ClickHouse/issues/15264). [#15285](https://github.com/ClickHouse/ClickHouse/pull/15285) ([flynn](https://github.com/ucasfl)). +* Unfold `{database}`, `{table}` and `{uuid}` macros in `zookeeper_path` on replicated table creation. Do not allow `RENAME TABLE` if it may break `zookeeper_path` after server restart. Fixes [#6917](https://github.com/ClickHouse/ClickHouse/issues/6917). [#15348](https://github.com/ClickHouse/ClickHouse/pull/15348) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add support for "Raw" column format for `Regexp` format. It allows to simply extract subpatterns as a whole without any escaping rules. [#15363](https://github.com/ClickHouse/ClickHouse/pull/15363) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). +* Wait for `DROP/DETACH TABLE` to actually finish if `NO DELAY` or `SYNC` is specified for `Atomic` database. [#15448](https://github.com/ClickHouse/ClickHouse/pull/15448) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Pass through *_for_user settings via Distributed with cluster-secure. [#15551](https://github.com/ClickHouse/ClickHouse/pull/15551) ([Azat Khuzhin](https://github.com/azat)). +* Use experimental pass manager by default. [#15608](https://github.com/ClickHouse/ClickHouse/pull/15608) ([Daniel Kutenin](https://github.com/danlark1)). +* Implement force_data_skipping_indices setting. [#15642](https://github.com/ClickHouse/ClickHouse/pull/15642) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Fix `currentDatabase()` function cannot be used in `ON CLUSTER` ddl query. [#14211](https://github.com/ClickHouse/ClickHouse/pull/14211) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix executable dictionary source hang. In previous versions, when using some formats (e.g. `JSONEachRow`) data was not feed to a child process before it outputs at least something. This closes [#1697](https://github.com/ClickHouse/ClickHouse/issues/1697). This closes [#2455](https://github.com/ClickHouse/ClickHouse/issues/2455). [#14525](https://github.com/ClickHouse/ClickHouse/pull/14525) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a bug when converting Nullable String to Enum. Introduced by https://github.com/ClickHouse/ClickHouse/pull/12745 . This fixes [#14435](https://github.com/ClickHouse/ClickHouse/issues/14435) . [#14530](https://github.com/ClickHouse/ClickHouse/pull/14530) ([Amos Bird](https://github.com/amosbird)). +* Fix rare segfaults in functions with combinator -Resample, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)). +* Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)). +* Added the checker as neither calling `lc->isNullable()` nor calling `ls->getDictionaryPtr()->isNullable()` would return the correct result. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([Mike Kot](https://github.com/myrrc)). +* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). +* Stuff the query into ASTFunction's argument list so that we don't break the presumptions of some AST visitors. This fixes [#14608](https://github.com/ClickHouse/ClickHouse/issues/14608). [#14611](https://github.com/ClickHouse/ClickHouse/pull/14611) ([Amos Bird](https://github.com/amosbird)). +* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Replace column transformer should replace identifiers with cloned ASTs. This fixes [#14695](https://github.com/ClickHouse/ClickHouse/issues/14695) . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)). +* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)). +* Fix unreleased bug for LineAsString Format. [#14842](https://github.com/ClickHouse/ClickHouse/pull/14842) ([hexiaoting](https://github.com/hexiaoting)). +* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)). +* Fixed bug in parsing MySQL binlog events, which causes `Attempt to read after eof` and `Packet payload is not fully read` in `MaterializeMySQL` database engine. [#14852](https://github.com/ClickHouse/ClickHouse/pull/14852) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed segfault in CacheDictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)). +* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)). +* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Update jemalloc to fix possible issues with percpu arena. [#14957](https://github.com/ClickHouse/ClickHouse/pull/14957) ([Azat Khuzhin](https://github.com/azat)). +* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)). +* Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([Alexander Tokmakov](https://github.com/tavplubix)). +* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)). +* Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)). +* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)). +* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)). +* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)). +* Fixed compression in S3 storage. [#15376](https://github.com/ClickHouse/ClickHouse/pull/15376) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix multiple occurrences of column transformers in a select query. [#15378](https://github.com/ClickHouse/ClickHouse/pull/15378) ([Amos Bird](https://github.com/amosbird)). +* fixes [#15365](https://github.com/ClickHouse/ClickHouse/issues/15365) fix attach mysql database engine throw exception(no query context). [#15384](https://github.com/ClickHouse/ClickHouse/pull/15384) ([Winter Zhang](https://github.com/zhang2014)). +* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)). +* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 'Database doesn't exist.' in queries with IN and Distributed table when there's no database on initiator. [#15538](https://github.com/ClickHouse/ClickHouse/pull/15538) ([Artem Zuikov](https://github.com/4ertus2)). +* Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Proper error handling during insert into MergeTree with S3. [#15657](https://github.com/ClickHouse/ClickHouse/pull/15657) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix race condition in AMQP-CPP. [#15667](https://github.com/ClickHouse/ClickHouse/pull/15667) ([alesapin](https://github.com/alesapin)). +* Fix rare race condition in dictionaries and tables from MySQL. [#15686](https://github.com/ClickHouse/ClickHouse/pull/15686) ([alesapin](https://github.com/alesapin)). +* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement +* Control CI builds configuration from the ClickHouse repository. [#14547](https://github.com/ClickHouse/ClickHouse/pull/14547) ([alesapin](https://github.com/alesapin)). +* Now ClickHouse uses gcc-10 for the release build. Fixes [#11138](https://github.com/ClickHouse/ClickHouse/issues/11138). [#14609](https://github.com/ClickHouse/ClickHouse/pull/14609) ([alesapin](https://github.com/alesapin)). +* Attempt to make performance test more reliable. It is done by remapping the executable memory of the process on the fly with `madvise` to use transparent huge pages - it can lower the number of iTLB misses which is the main source of instabilities in performance tests. [#14685](https://github.com/ClickHouse/ClickHouse/pull/14685) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. In CMake files: - Moved some options' descriptions' parts to comments above. - Replace 0 -> `OFF`, 1 -> `ON` in `option`s default values. - Added some descriptions and links to docs to the options. - Replaced `FUZZER` option (there is another option `ENABLE_FUZZING` which also enables same functionality). - Removed `ENABLE_GTEST_LIBRARY` option as there is `ENABLE_TESTS`. [#14711](https://github.com/ClickHouse/ClickHouse/pull/14711) ([Mike Kot](https://github.com/myrrc)). +* Speed up build a little by removing unused headers. [#14714](https://github.com/ClickHouse/ClickHouse/pull/14714) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build failure in OSX. [#14761](https://github.com/ClickHouse/ClickHouse/pull/14761) ([Winter Zhang](https://github.com/zhang2014)). +* Attempt to speed up build a little. [#14808](https://github.com/ClickHouse/ClickHouse/pull/14808) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use clang-11 to build ClickHouse in CI. [#14846](https://github.com/ClickHouse/ClickHouse/pull/14846) ([alesapin](https://github.com/alesapin)). +* #14809 fix MaterializeMySQL empty transaction unstable test case found in CI. [#14854](https://github.com/ClickHouse/ClickHouse/pull/14854) ([Winter Zhang](https://github.com/zhang2014)). +* Reformat and cleanup code in all integration test *.py files. [#14864](https://github.com/ClickHouse/ClickHouse/pull/14864) ([Bharat Nallan](https://github.com/bharatnc)). +* Fixing tests/integration/test_distributed_over_live_view/test.py. [#14892](https://github.com/ClickHouse/ClickHouse/pull/14892) ([vzakaznikov](https://github.com/vzakaznikov)). +* Switch from `clang-tidy-10` to `clang-tidy-11`. [#14922](https://github.com/ClickHouse/ClickHouse/pull/14922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Convert to python3. This closes [#14886](https://github.com/ClickHouse/ClickHouse/issues/14886). [#15007](https://github.com/ClickHouse/ClickHouse/pull/15007) ([Azat Khuzhin](https://github.com/azat)). +* Make performance test more stable and representative by splitting test runs and profile runs. [#15027](https://github.com/ClickHouse/ClickHouse/pull/15027) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe fix MSan report in base64 (on servers with AVX-512). This fixes [#14006](https://github.com/ClickHouse/ClickHouse/issues/14006). [#15030](https://github.com/ClickHouse/ClickHouse/pull/15030) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't allow any C++ translation unit to build more than 10 minutes or to use more than 10 GB or memory. This fixes [#14925](https://github.com/ClickHouse/ClickHouse/issues/14925). [#15060](https://github.com/ClickHouse/ClickHouse/pull/15060) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now all test images use `llvm-symbolizer-11`. [#15069](https://github.com/ClickHouse/ClickHouse/pull/15069) ([alesapin](https://github.com/alesapin)). +* Splitted huge test `test_dictionaries_all_layouts_and_sources` into smaller ones. [#15110](https://github.com/ClickHouse/ClickHouse/pull/15110) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added a script to perform hardware benchmark in a single command. [#15115](https://github.com/ClickHouse/ClickHouse/pull/15115) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix CMake options forwarding in fast test script. Fixes error in [#14711](https://github.com/ClickHouse/ClickHouse/issues/14711). [#15155](https://github.com/ClickHouse/ClickHouse/pull/15155) ([alesapin](https://github.com/alesapin)). +* Improvements in CI docker images: get rid of ZooKeeper and single script for test configs installation. [#15215](https://github.com/ClickHouse/ClickHouse/pull/15215) ([alesapin](https://github.com/alesapin)). +* Now we use clang-11 for production ClickHouse build. [#15239](https://github.com/ClickHouse/ClickHouse/pull/15239) ([alesapin](https://github.com/alesapin)). +* Allow to run AArch64 version of clickhouse-server without configs. This facilitates [#15174](https://github.com/ClickHouse/ClickHouse/issues/15174). [#15266](https://github.com/ClickHouse/ClickHouse/pull/15266) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fail early in functional tests if server failed to respond. This closes [#15262](https://github.com/ClickHouse/ClickHouse/issues/15262). [#15267](https://github.com/ClickHouse/ClickHouse/pull/15267) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix bug for build error: [#15272](https://github.com/ClickHouse/ClickHouse/issues/15272). [#15297](https://github.com/ClickHouse/ClickHouse/pull/15297) ([hexiaoting](https://github.com/hexiaoting)). +* fix bug for building query_db_generator.cpp. [#15353](https://github.com/ClickHouse/ClickHouse/pull/15353) ([hexiaoting](https://github.com/hexiaoting)). +* Allow to build with llvm-11. [#15366](https://github.com/ClickHouse/ClickHouse/pull/15366) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Switch binary builds(Linux, Darwin, AArch64, FreeDSD) to clang-11. [#15622](https://github.com/ClickHouse/ClickHouse/pull/15622) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix build of one miscellaneous example tool on Mac OS. Note that we don't build examples on Mac OS in our CI (we build only ClickHouse binary), so there is zero chance it will not break again. This fixes [#15804](https://github.com/ClickHouse/ClickHouse/issues/15804). [#15808](https://github.com/ClickHouse/ClickHouse/pull/15808) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Other +* Make binary a bit smaller (~50 Mb for debug version). [#14555](https://github.com/ClickHouse/ClickHouse/pull/14555) ([Artem Zuikov](https://github.com/4ertus2)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump numpy from 1.19.1 to 1.19.2 in /docs/tools'. [#14733](https://github.com/ClickHouse/ClickHouse/pull/14733) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Modify the minimum version of the Clang compiler'. [#14882](https://github.com/ClickHouse/ClickHouse/pull/14882) ([Simon Liu](https://github.com/monadbobo)). +* NO CL ENTRY: 'fix a syntax error bug while using copier'. [#14890](https://github.com/ClickHouse/ClickHouse/pull/14890) ([HyaZz](https://github.com/HyaZz)). +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.9 to 0.4.13 in /docs/tools'. [#15067](https://github.com/ClickHouse/ClickHouse/pull/15067) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Revert "Test and doc for PR12771 krb5 + cyrus-sasl + kerberized kafka"'. [#15232](https://github.com/ClickHouse/ClickHouse/pull/15232) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Avoid deadlocks in Log/TinyLog"'. [#15259](https://github.com/ClickHouse/ClickHouse/pull/15259) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.13 to 0.4.17 in /docs/tools'. [#15460](https://github.com/ClickHouse/ClickHouse/pull/15460) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + diff --git a/docs/changelogs/v20.10.2.20-stable.md b/docs/changelogs/v20.10.2.20-stable.md new file mode 100644 index 00000000000..3f03721fb16 --- /dev/null +++ b/docs/changelogs/v20.10.2.20-stable.md @@ -0,0 +1,180 @@ +### ClickHouse release v20.10.2.20-stable FIXME as compared to v20.9.1.4585-prestable + +#### Backward Incompatible Change +* Add support for nested multiline comments `/* comment /* comment */ */` in SQL. This conforms to the SQL standard. [#14655](https://github.com/ClickHouse/ClickHouse/pull/14655) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change default value of `format_regexp_escaping_rule` setting (it's related to `Regexp` format) to `Raw` (it means - read whole subpattern as a value) to make the behaviour more like to what users expect. [#15426](https://github.com/ClickHouse/ClickHouse/pull/15426) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `multiple_joins_rewriter_version` obsolete. Remove first version of joins rewriter. [#15472](https://github.com/ClickHouse/ClickHouse/pull/15472) ([Artem Zuikov](https://github.com/4ertus2)). + +#### New Feature +* Add new feature: format LineAsString that accepts a sequence of line separated by newlines, spaces and/or commas. [#14703](https://github.com/ClickHouse/ClickHouse/pull/14703) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added `formatReadableQuantity` function. It is useful for reading big numbers by human. [#14725](https://github.com/ClickHouse/ClickHouse/pull/14725) ([Artem Hnilov](https://github.com/BooBSD)). +* Add the ability to remove column properties and table TTLs. Introduced queries `ALTER TABLE MODIFY COLUMN col_name REMOVE what_to_remove` and `ALTER TABLE REMOVE TTL`. Both operations are lightweight and executed at the metadata level. [#14742](https://github.com/ClickHouse/ClickHouse/pull/14742) ([alesapin](https://github.com/alesapin)). +* Introduce event_time_microseconds field to `system.text_log`, `system.trace_log`, `system.query_log` and `system.query_thread_log` tables. [#14760](https://github.com/ClickHouse/ClickHouse/pull/14760) ([Bharat Nallan](https://github.com/bharatnc)). +* Now we support `WITH AS (subquery) ... ` to introduce named subqueries in the query context. This closes [#2416](https://github.com/ClickHouse/ClickHouse/issues/2416). This closes [#4967](https://github.com/ClickHouse/ClickHouse/issues/4967). [#14771](https://github.com/ClickHouse/ClickHouse/pull/14771) ([Amos Bird](https://github.com/amosbird)). +* Allow to omit arguments for Replicated table engine if defaults are specified in config. [#14791](https://github.com/ClickHouse/ClickHouse/pull/14791) ([vxider](https://github.com/Vxider)). +* Add table function `null('structure')`. [#14797](https://github.com/ClickHouse/ClickHouse/pull/14797) ([vxider](https://github.com/Vxider)). +* Added query obfuscation tool. It allows to share more queries for better testing. This closes [#15268](https://github.com/ClickHouse/ClickHouse/issues/15268). [#15321](https://github.com/ClickHouse/ClickHouse/pull/15321) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added format `RawBLOB`. It is intended for input or output a single value without any escaping and delimiters. This closes [#15349](https://github.com/ClickHouse/ClickHouse/issues/15349). [#15364](https://github.com/ClickHouse/ClickHouse/pull/15364) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix [15350](https://github.com/ClickHouse/ClickHouse/issues/15350). [#15443](https://github.com/ClickHouse/ClickHouse/pull/15443) ([flynn](https://github.com/ucasfl)). +* Introduce `enable_global_with_statement` setting which propagates the first select's `WITH` statements to other select queries at the same level, and makes aliases in `WITH` statements visible to subqueries. [#15451](https://github.com/ClickHouse/ClickHouse/pull/15451) ([Amos Bird](https://github.com/amosbird)). +* Add the `reinterpretAsUUID` function that allows to convert a big-endian byte string to UUID. [#15480](https://github.com/ClickHouse/ClickHouse/pull/15480) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add parallel quorum inserts. This closes [#15601](https://github.com/ClickHouse/ClickHouse/issues/15601). [#15601](https://github.com/ClickHouse/ClickHouse/pull/15601) ([Latysheva Alexandra](https://github.com/alexelex)). + +#### Performance Improvement +* Improve performance of 256-bit bytes using (u)int64_t as base type for wide integers. Original wide integers use 8-bit types as base. [#14859](https://github.com/ClickHouse/ClickHouse/pull/14859) ([Artem Zuikov](https://github.com/4ertus2)). +* Only `mlock` code segment when starting clickhouse-server. In previous versions, all mapped regions were locked in memory, including debug info. Debug info is usually splitted to a separate file but if it isn't, it led to +2..3 GiB memory usage. [#14929](https://github.com/ClickHouse/ClickHouse/pull/14929) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We used to choose fixed key method to group by one fixed string. It's unnecessary since we have StringHashTable which do the similar packedFix optimization for FixedString columns. And we should use low_cardinality_key_fixed_string if possible. [#15034](https://github.com/ClickHouse/ClickHouse/pull/15034) ([Amos Bird](https://github.com/amosbird)). +* Fix `DateTime DateTime` mistakenly choosing the slow generic implementation. This fixes [#15153](https://github.com/ClickHouse/ClickHouse/issues/15153) . [#15178](https://github.com/ClickHouse/ClickHouse/pull/15178) ([Amos Bird](https://github.com/amosbird)). +* Use one S3 DeleteObjects request instead of multiple DeleteObject in cycle. No any functionality changes, so covered by existing tests like integration/test_log_family_s3. [#15238](https://github.com/ClickHouse/ClickHouse/pull/15238) ([ianton-ru](https://github.com/ianton-ru)). +* Faster 256-bit multiplication. [#15418](https://github.com/ClickHouse/ClickHouse/pull/15418) ([Artem Zuikov](https://github.com/4ertus2)). +* Improve `quantileTDigest` performance. This fixes [#2668](https://github.com/ClickHouse/ClickHouse/issues/2668). [#15542](https://github.com/ClickHouse/ClickHouse/pull/15542) ([Kruglov Pavel](https://github.com/Avogar)). +* Explicitly use a temporary disk to store vertical merge temporary data. [#15639](https://github.com/ClickHouse/ClickHouse/pull/15639) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). + +#### Improvement +* Add `QueryMemoryLimitExceeded` event. This closes [#14589](https://github.com/ClickHouse/ClickHouse/issues/14589). [#14647](https://github.com/ClickHouse/ClickHouse/pull/14647) ([fastio](https://github.com/fastio)). +* Fixed the backward-incompatible change by providing the options to build without debug info for functions. [#14657](https://github.com/ClickHouse/ClickHouse/pull/14657) ([Mike Kot](https://github.com/myrrc)). +* dynamic reload zookeeper config. [#14678](https://github.com/ClickHouse/ClickHouse/pull/14678) ([sundyli](https://github.com/sundy-li)). +* Allow parallel execution of distributed DDL. [#14684](https://github.com/ClickHouse/ClickHouse/pull/14684) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential memory leak caused by zookeeper exists watch. [#14693](https://github.com/ClickHouse/ClickHouse/pull/14693) ([hustnn](https://github.com/hustnn)). +* Fixed "Packet payload is not fully read" error in `MaterializeMySQL` database engine. [#14696](https://github.com/ClickHouse/ClickHouse/pull/14696) ([BohuTANG](https://github.com/BohuTANG)). +* Fix crash in `bitShiftLeft()` when called with negative big integer. [#14697](https://github.com/ClickHouse/ClickHouse/pull/14697) ([Artem Zuikov](https://github.com/4ertus2)). +* Add `merge_algorithm` to system.merges table to improve merging inspections. [#14705](https://github.com/ClickHouse/ClickHouse/pull/14705) ([Amos Bird](https://github.com/amosbird)). +* Less unneded code generated by DecimalBinaryOperation template in FunctionBinaryArithmetic. [#14743](https://github.com/ClickHouse/ClickHouse/pull/14743) ([Artem Zuikov](https://github.com/4ertus2)). +* Now columns can be used to wrap over a list of columns and apply column transformers afterwards. [#14775](https://github.com/ClickHouse/ClickHouse/pull/14775) ([Amos Bird](https://github.com/amosbird)). +* Support for disabling persistency for StorageJoin and StorageSet, this feature is controlled by setting `disable_set_and_join_persistency`. And this PR solved issue [#6318](https://github.com/ClickHouse/ClickHouse/issues/6318). [#14776](https://github.com/ClickHouse/ClickHouse/pull/14776) ([vxider](https://github.com/Vxider)). +* Construct `query_start_time` and `query_start_time_microseconds` from the same timespec. [#14831](https://github.com/ClickHouse/ClickHouse/pull/14831) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow using multi-volume storage configuration in storage Distributed. [#14839](https://github.com/ClickHouse/ClickHouse/pull/14839) ([Pavel Kovalenko](https://github.com/Jokser)). +* Show subqueries for `SET` and `JOIN` in `EXPLAIN` result. [#14856](https://github.com/ClickHouse/ClickHouse/pull/14856) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Provide a `load_balancing_first_offset` query setting to explicitly state what the first replica is. It's used together with `FIRST_OR_RANDOM` load balancing strategy, which allows to control replicas workload. [#14867](https://github.com/ClickHouse/ClickHouse/pull/14867) ([Amos Bird](https://github.com/amosbird)). +* Fixed excessive settings constraint violation when running SELECT with SETTINGS from a distributed table. [#14876](https://github.com/ClickHouse/ClickHouse/pull/14876) ([Amos Bird](https://github.com/amosbird)). +* Allow to drop Replicated table if previous drop attempt was failed due to ZooKeeper session expiration. This fixes [#11891](https://github.com/ClickHouse/ClickHouse/issues/11891). [#14926](https://github.com/ClickHouse/ClickHouse/pull/14926) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid deadlock when executing INSERT SELECT into itself from a table with `TinyLog` or `Log` table engines. This closes [#6802](https://github.com/ClickHouse/ClickHouse/issues/6802). [#14962](https://github.com/ClickHouse/ClickHouse/pull/14962) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ignore key constraints when doing mutations. Without this pr, it's not possible to do mutations when `force_index_by_date = 1` or `force_primary_key = 1`. [#14973](https://github.com/ClickHouse/ClickHouse/pull/14973) ([Amos Bird](https://github.com/amosbird)). +* Add option to disable TTL move on data part insert. [#15000](https://github.com/ClickHouse/ClickHouse/pull/15000) ([Pavel Kovalenko](https://github.com/Jokser)). +* Enable `Atomic` database engine by default. [#15003](https://github.com/ClickHouse/ClickHouse/pull/15003) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Proper exception message for wrong number of arguments of CAST. This closes [#13992](https://github.com/ClickHouse/ClickHouse/issues/13992). [#15029](https://github.com/ClickHouse/ClickHouse/pull/15029) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add the ability to specify specialized codecs like `Delta`, `T64`, etc. for columns with subtypes. Implements [#12551](https://github.com/ClickHouse/ClickHouse/issues/12551), fixes [#11397](https://github.com/ClickHouse/ClickHouse/issues/11397), fixes [#4609](https://github.com/ClickHouse/ClickHouse/issues/4609). [#15089](https://github.com/ClickHouse/ClickHouse/pull/15089) ([alesapin](https://github.com/alesapin)). +* Added `optimize` setting to `EXPLAIN PLAN` query. If enabled, query plan level optimisations are applied. Enabled by default. [#15201](https://github.com/ClickHouse/ClickHouse/pull/15201) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not allow connections to ClickHouse server until all scripts in `/docker-entrypoint-initdb.d/` are executed. [#15244](https://github.com/ClickHouse/ClickHouse/pull/15244) ([Aleksei Kozharin](https://github.com/alekseik1)). +* fix [15264](https://github.com/ClickHouse/ClickHouse/issues/15264). [#15285](https://github.com/ClickHouse/ClickHouse/pull/15285) ([flynn](https://github.com/ucasfl)). +* Unfold `{database}`, `{table}` and `{uuid}` macros in `zookeeper_path` on replicated table creation. Do not allow `RENAME TABLE` if it may break `zookeeper_path` after server restart. Fixes [#6917](https://github.com/ClickHouse/ClickHouse/issues/6917). [#15348](https://github.com/ClickHouse/ClickHouse/pull/15348) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add support for "Raw" column format for `Regexp` format. It allows to simply extract subpatterns as a whole without any escaping rules. [#15363](https://github.com/ClickHouse/ClickHouse/pull/15363) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). +* Wait for `DROP/DETACH TABLE` to actually finish if `NO DELAY` or `SYNC` is specified for `Atomic` database. [#15448](https://github.com/ClickHouse/ClickHouse/pull/15448) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Pass through *_for_user settings via Distributed with cluster-secure. [#15551](https://github.com/ClickHouse/ClickHouse/pull/15551) ([Azat Khuzhin](https://github.com/azat)). +* Use experimental pass manager by default. [#15608](https://github.com/ClickHouse/ClickHouse/pull/15608) ([Daniel Kutenin](https://github.com/danlark1)). +* Implement force_data_skipping_indices setting. [#15642](https://github.com/ClickHouse/ClickHouse/pull/15642) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16102](https://github.com/ClickHouse/ClickHouse/issues/16102): Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix +* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). +* Stuff the query into ASTFunction's argument list so that we don't break the presumptions of some AST visitors. This fixes [#14608](https://github.com/ClickHouse/ClickHouse/issues/14608). [#14611](https://github.com/ClickHouse/ClickHouse/pull/14611) ([Amos Bird](https://github.com/amosbird)). +* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Replace column transformer should replace identifiers with cloned ASTs. This fixes [#14695](https://github.com/ClickHouse/ClickHouse/issues/14695) . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)). +* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)). +* Fix unreleased bug for LineAsString Format. [#14842](https://github.com/ClickHouse/ClickHouse/pull/14842) ([hexiaoting](https://github.com/hexiaoting)). +* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)). +* Fixed bug in parsing MySQL binlog events, which causes `Attempt to read after eof` and `Packet payload is not fully read` in `MaterializeMySQL` database engine. [#14852](https://github.com/ClickHouse/ClickHouse/pull/14852) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed segfault in CacheDictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)). +* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)). +* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Update jemalloc to fix possible issues with percpu arena. [#14957](https://github.com/ClickHouse/ClickHouse/pull/14957) ([Azat Khuzhin](https://github.com/azat)). +* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)). +* Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([Alexander Tokmakov](https://github.com/tavplubix)). +* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15829](https://github.com/ClickHouse/ClickHouse/issues/15829): Update jemalloc to fix percpu_arena with affinity mask. [#15035](https://github.com/ClickHouse/ClickHouse/pull/15035) ([Azat Khuzhin](https://github.com/azat)). +* Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)). +* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)). +* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)). +* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)). +* Fixed compression in S3 storage. [#15376](https://github.com/ClickHouse/ClickHouse/pull/15376) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix multiple occurrences of column transformers in a select query. [#15378](https://github.com/ClickHouse/ClickHouse/pull/15378) ([Amos Bird](https://github.com/amosbird)). +* fixes [#15365](https://github.com/ClickHouse/ClickHouse/issues/15365) fix attach mysql database engine throw exception(no query context). [#15384](https://github.com/ClickHouse/ClickHouse/pull/15384) ([Winter Zhang](https://github.com/zhang2014)). +* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)). +* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 'Database doesn't exist.' in queries with IN and Distributed table when there's no database on initiator. [#15538](https://github.com/ClickHouse/ClickHouse/pull/15538) ([Artem Zuikov](https://github.com/4ertus2)). +* Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#16009](https://github.com/ClickHouse/ClickHouse/issues/16009): Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Proper error handling during insert into MergeTree with S3. [#15657](https://github.com/ClickHouse/ClickHouse/pull/15657) ([Pavel Kovalenko](https://github.com/Jokser)). +* Backported in [#15869](https://github.com/ClickHouse/ClickHouse/issues/15869): Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race condition in AMQP-CPP. [#15667](https://github.com/ClickHouse/ClickHouse/pull/15667) ([alesapin](https://github.com/alesapin)). +* Fix rare race condition in dictionaries and tables from MySQL. [#15686](https://github.com/ClickHouse/ClickHouse/pull/15686) ([alesapin](https://github.com/alesapin)). +* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#15926](https://github.com/ClickHouse/ClickHouse/issues/15926): Fix drop of materialized view with inner table in Atomic database (hangs all subsequent DROP TABLE due to hang of the worker thread, due to recursive DROP TABLE for inner table of MV). [#15743](https://github.com/ClickHouse/ClickHouse/pull/15743) ([Azat Khuzhin](https://github.com/azat)). +* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#15867](https://github.com/ClickHouse/ClickHouse/issues/15867): Fix `select count()` inaccuracy for MaterializeMySQL. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15902](https://github.com/ClickHouse/ClickHouse/issues/15902): Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Backported in [#15922](https://github.com/ClickHouse/ClickHouse/issues/15922): Now exception will be thrown when `ALTER MODIFY COLUMN ... DEFAULT ...` has incompatible default with column type. Fixes [#15854](https://github.com/ClickHouse/ClickHouse/issues/15854). [#15858](https://github.com/ClickHouse/ClickHouse/pull/15858) ([alesapin](https://github.com/alesapin)). +* Backported in [#15917](https://github.com/ClickHouse/ClickHouse/issues/15917): Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#15950](https://github.com/ClickHouse/ClickHouse/issues/15950): fixes [#12513](https://github.com/ClickHouse/ClickHouse/issues/12513) fix difference expressions with same alias when analyze queries again. [#15886](https://github.com/ClickHouse/ClickHouse/pull/15886) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16169](https://github.com/ClickHouse/ClickHouse/issues/16169): Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#15969](https://github.com/ClickHouse/ClickHouse/issues/15969): Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16023](https://github.com/ClickHouse/ClickHouse/issues/16023): Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#16217](https://github.com/ClickHouse/ClickHouse/issues/16217): Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#16027](https://github.com/ClickHouse/ClickHouse/issues/16027): Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Backported in [#16089](https://github.com/ClickHouse/ClickHouse/issues/16089): Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#16142](https://github.com/ClickHouse/ClickHouse/issues/16142): Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Backported in [#16077](https://github.com/ClickHouse/ClickHouse/issues/16077): Fixes [#15780](https://github.com/ClickHouse/ClickHouse/issues/15780) regression, e.g. indexOf([1, 2, 3], toLowCardinality(1)) now is prohibited but it should not be. [#16038](https://github.com/ClickHouse/ClickHouse/pull/16038) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#16120](https://github.com/ClickHouse/ClickHouse/issues/16120): Fix segfault in some cases of wrong aggregation in lambdas. [#16082](https://github.com/ClickHouse/ClickHouse/pull/16082) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement +* Now ClickHouse uses gcc-10 for the release build. Fixes [#11138](https://github.com/ClickHouse/ClickHouse/issues/11138). [#14609](https://github.com/ClickHouse/ClickHouse/pull/14609) ([alesapin](https://github.com/alesapin)). +* Attempt to make performance test more reliable. It is done by remapping the executable memory of the process on the fly with `madvise` to use transparent huge pages - it can lower the number of iTLB misses which is the main source of instabilities in performance tests. [#14685](https://github.com/ClickHouse/ClickHouse/pull/14685) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. In CMake files: - Moved some options' descriptions' parts to comments above. - Replace 0 -> `OFF`, 1 -> `ON` in `option`s default values. - Added some descriptions and links to docs to the options. - Replaced `FUZZER` option (there is another option `ENABLE_FUZZING` which also enables same functionality). - Removed `ENABLE_GTEST_LIBRARY` option as there is `ENABLE_TESTS`. [#14711](https://github.com/ClickHouse/ClickHouse/pull/14711) ([Mike Kot](https://github.com/myrrc)). +* Speed up build a little by removing unused headers. [#14714](https://github.com/ClickHouse/ClickHouse/pull/14714) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build failure in OSX. [#14761](https://github.com/ClickHouse/ClickHouse/pull/14761) ([Winter Zhang](https://github.com/zhang2014)). +* Attempt to speed up build a little. [#14808](https://github.com/ClickHouse/ClickHouse/pull/14808) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use clang-11 to build ClickHouse in CI. [#14846](https://github.com/ClickHouse/ClickHouse/pull/14846) ([alesapin](https://github.com/alesapin)). +* #14809 fix MaterializeMySQL empty transaction unstable test case found in CI. [#14854](https://github.com/ClickHouse/ClickHouse/pull/14854) ([Winter Zhang](https://github.com/zhang2014)). +* Reformat and cleanup code in all integration test *.py files. [#14864](https://github.com/ClickHouse/ClickHouse/pull/14864) ([Bharat Nallan](https://github.com/bharatnc)). +* Fixing tests/integration/test_distributed_over_live_view/test.py. [#14892](https://github.com/ClickHouse/ClickHouse/pull/14892) ([vzakaznikov](https://github.com/vzakaznikov)). +* Switch from `clang-tidy-10` to `clang-tidy-11`. [#14922](https://github.com/ClickHouse/ClickHouse/pull/14922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Convert to python3. This closes [#14886](https://github.com/ClickHouse/ClickHouse/issues/14886). [#15007](https://github.com/ClickHouse/ClickHouse/pull/15007) ([Azat Khuzhin](https://github.com/azat)). +* Make performance test more stable and representative by splitting test runs and profile runs. [#15027](https://github.com/ClickHouse/ClickHouse/pull/15027) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe fix MSan report in base64 (on servers with AVX-512). This fixes [#14006](https://github.com/ClickHouse/ClickHouse/issues/14006). [#15030](https://github.com/ClickHouse/ClickHouse/pull/15030) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't allow any C++ translation unit to build more than 10 minutes or to use more than 10 GB or memory. This fixes [#14925](https://github.com/ClickHouse/ClickHouse/issues/14925). [#15060](https://github.com/ClickHouse/ClickHouse/pull/15060) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now all test images use `llvm-symbolizer-11`. [#15069](https://github.com/ClickHouse/ClickHouse/pull/15069) ([alesapin](https://github.com/alesapin)). +* Splitted huge test `test_dictionaries_all_layouts_and_sources` into smaller ones. [#15110](https://github.com/ClickHouse/ClickHouse/pull/15110) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added a script to perform hardware benchmark in a single command. [#15115](https://github.com/ClickHouse/ClickHouse/pull/15115) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix CMake options forwarding in fast test script. Fixes error in [#14711](https://github.com/ClickHouse/ClickHouse/issues/14711). [#15155](https://github.com/ClickHouse/ClickHouse/pull/15155) ([alesapin](https://github.com/alesapin)). +* Improvements in CI docker images: get rid of ZooKeeper and single script for test configs installation. [#15215](https://github.com/ClickHouse/ClickHouse/pull/15215) ([alesapin](https://github.com/alesapin)). +* Now we use clang-11 for production ClickHouse build. [#15239](https://github.com/ClickHouse/ClickHouse/pull/15239) ([alesapin](https://github.com/alesapin)). +* Allow to run AArch64 version of clickhouse-server without configs. This facilitates [#15174](https://github.com/ClickHouse/ClickHouse/issues/15174). [#15266](https://github.com/ClickHouse/ClickHouse/pull/15266) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fail early in functional tests if server failed to respond. This closes [#15262](https://github.com/ClickHouse/ClickHouse/issues/15262). [#15267](https://github.com/ClickHouse/ClickHouse/pull/15267) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix bug for build error: [#15272](https://github.com/ClickHouse/ClickHouse/issues/15272). [#15297](https://github.com/ClickHouse/ClickHouse/pull/15297) ([hexiaoting](https://github.com/hexiaoting)). +* fix bug for building query_db_generator.cpp. [#15353](https://github.com/ClickHouse/ClickHouse/pull/15353) ([hexiaoting](https://github.com/hexiaoting)). +* Allow to build with llvm-11. [#15366](https://github.com/ClickHouse/ClickHouse/pull/15366) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Switch binary builds(Linux, Darwin, AArch64, FreeDSD) to clang-11. [#15622](https://github.com/ClickHouse/ClickHouse/pull/15622) ([Ilya Yatsishin](https://github.com/qoega)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump numpy from 1.19.1 to 1.19.2 in /docs/tools'. [#14733](https://github.com/ClickHouse/ClickHouse/pull/14733) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Modify the minimum version of the Clang compiler'. [#14882](https://github.com/ClickHouse/ClickHouse/pull/14882) ([Simon Liu](https://github.com/monadbobo)). +* NO CL ENTRY: 'fix a syntax error bug while using copier'. [#14890](https://github.com/ClickHouse/ClickHouse/pull/14890) ([HyaZz](https://github.com/HyaZz)). +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.9 to 0.4.13 in /docs/tools'. [#15067](https://github.com/ClickHouse/ClickHouse/pull/15067) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Revert "Test and doc for PR12771 krb5 + cyrus-sasl + kerberized kafka"'. [#15232](https://github.com/ClickHouse/ClickHouse/pull/15232) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Avoid deadlocks in Log/TinyLog"'. [#15259](https://github.com/ClickHouse/ClickHouse/pull/15259) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.13 to 0.4.17 in /docs/tools'. [#15460](https://github.com/ClickHouse/ClickHouse/pull/15460) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + diff --git a/docs/changelogs/v20.10.3.30-stable.md b/docs/changelogs/v20.10.3.30-stable.md new file mode 100644 index 00000000000..29fa85a4077 --- /dev/null +++ b/docs/changelogs/v20.10.3.30-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v20.10.3.30-stable FIXME as compared to v20.10.2.20-stable + +#### Improvement +* Backported in [#16313](https://github.com/ClickHouse/ClickHouse/issues/16313): Add allow_nondeterministic_optimize_skip_unused_shards (to allow non deterministic like rand() or dictGet() in sharding key). [#16105](https://github.com/ClickHouse/ClickHouse/pull/16105) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Backported in [#16202](https://github.com/ClickHouse/ClickHouse/issues/16202): Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Backported in [#16177](https://github.com/ClickHouse/ClickHouse/issues/16177): Possibility to move part to another disk/volume if the first attempt was failed. [#15723](https://github.com/ClickHouse/ClickHouse/pull/15723) ([Pavel Kovalenko](https://github.com/Jokser)). +* Backported in [#16323](https://github.com/ClickHouse/ClickHouse/issues/16323): Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#16359](https://github.com/ClickHouse/ClickHouse/issues/16359): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16299](https://github.com/ClickHouse/ClickHouse/issues/16299): Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16234](https://github.com/ClickHouse/ClickHouse/issues/16234): Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#16327](https://github.com/ClickHouse/ClickHouse/issues/16327): Fix a possible memory leak during `GROUP BY` with string keys, caused by an error in `TwoLevelStringHashTable` implementation. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#16374](https://github.com/ClickHouse/ClickHouse/issues/16374): Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16419](https://github.com/ClickHouse/ClickHouse/issues/16419): Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#16448](https://github.com/ClickHouse/ClickHouse/issues/16448): Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v20.10.4.1-stable.md b/docs/changelogs/v20.10.4.1-stable.md new file mode 100644 index 00000000000..cfebce93df6 --- /dev/null +++ b/docs/changelogs/v20.10.4.1-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v20.10.4.1-stable FIXME as compared to v20.10.3.30-stable + +#### Improvement +* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like http://domain.com?delete, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like http://domain.com/?delete. [#16813](https://github.com/ClickHouse/ClickHouse/pull/16813) ([ianton-ru](https://github.com/ianton-ru)). + +#### Bug Fix +* Backported in [#16492](https://github.com/ClickHouse/ClickHouse/issues/16492): Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#16710](https://github.com/ClickHouse/ClickHouse/issues/16710): Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#16505](https://github.com/ClickHouse/ClickHouse/issues/16505): Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#16471](https://github.com/ClickHouse/ClickHouse/issues/16471): Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16572](https://github.com/ClickHouse/ClickHouse/issues/16572): Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Backported in [#16551](https://github.com/ClickHouse/ClickHouse/issues/16551): Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#16749](https://github.com/ClickHouse/ClickHouse/issues/16749): Fixed [#16081](https://github.com/ClickHouse/ClickHouse/issues/16081). [#16613](https://github.com/ClickHouse/ClickHouse/pull/16613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#16760](https://github.com/ClickHouse/ClickHouse/issues/16760): This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16741](https://github.com/ClickHouse/ClickHouse/issues/16741): Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#16893](https://github.com/ClickHouse/ClickHouse/issues/16893): Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.10.5.10-stable.md b/docs/changelogs/v20.10.5.10-stable.md new file mode 100644 index 00000000000..bdbabe8a03b --- /dev/null +++ b/docs/changelogs/v20.10.5.10-stable.md @@ -0,0 +1,18 @@ +### ClickHouse release v20.10.5.10-stable FIXME as compared to v20.10.4.1-stable + +#### Improvement +* Backported in [#17031](https://github.com/ClickHouse/ClickHouse/issues/17031): Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix +* Backported in [#17107](https://github.com/ClickHouse/ClickHouse/issues/17107): fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#17022](https://github.com/ClickHouse/ClickHouse/issues/17022): Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#16878](https://github.com/ClickHouse/ClickHouse/issues/16878): Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)). +* Backported in [#16949](https://github.com/ClickHouse/ClickHouse/issues/16949): Prevent clickhouse server crashes when using TimeSeriesGroupSum. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)). +* Backported in [#17075](https://github.com/ClickHouse/ClickHouse/issues/17075): Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17008](https://github.com/ClickHouse/ClickHouse/issues/17008): Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)). +* Backported in [#16967](https://github.com/ClickHouse/ClickHouse/issues/16967): Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17012](https://github.com/ClickHouse/ClickHouse/issues/17012): Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17038](https://github.com/ClickHouse/ClickHouse/issues/17038): Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)). +* Backported in [#17092](https://github.com/ClickHouse/ClickHouse/issues/17092): Fixed wrong result in big integers (128, 256 bit) when casting from double. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#17169](https://github.com/ClickHouse/ClickHouse/issues/17169): Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.10.6.27-stable.md b/docs/changelogs/v20.10.6.27-stable.md new file mode 100644 index 00000000000..68b34411927 --- /dev/null +++ b/docs/changelogs/v20.10.6.27-stable.md @@ -0,0 +1,31 @@ +### ClickHouse release v20.10.6.27-stable FIXME as compared to v20.10.5.10-stable + +#### Performance Improvement +* Backported in [#17591](https://github.com/ClickHouse/ClickHouse/issues/17591): Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* Backported in [#17157](https://github.com/ClickHouse/ClickHouse/issues/17157): Fixed uncontrolled growth of TDigest. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)). +* Backported in [#17314](https://github.com/ClickHouse/ClickHouse/issues/17314): Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#17341](https://github.com/ClickHouse/ClickHouse/issues/17341): TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17587](https://github.com/ClickHouse/ClickHouse/issues/17587): Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17594](https://github.com/ClickHouse/ClickHouse/issues/17594): Fix order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17197](https://github.com/ClickHouse/ClickHouse/issues/17197): Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#17431](https://github.com/ClickHouse/ClickHouse/issues/17431): Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#17130](https://github.com/ClickHouse/ClickHouse/issues/17130): Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17193](https://github.com/ClickHouse/ClickHouse/issues/17193): Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17395](https://github.com/ClickHouse/ClickHouse/issues/17395): Fix [#15235](https://github.com/ClickHouse/ClickHouse/issues/15235). When clickhouse-copier handle non-partitioned table, throws segfault error. [#17248](https://github.com/ClickHouse/ClickHouse/pull/17248) ([Qi Chen](https://github.com/kaka11chen)). +* Backported in [#17407](https://github.com/ClickHouse/ClickHouse/issues/17407): Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17487](https://github.com/ClickHouse/ClickHouse/issues/17487): Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17491](https://github.com/ClickHouse/ClickHouse/issues/17491): Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17524](https://github.com/ClickHouse/ClickHouse/issues/17524): Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17532](https://github.com/ClickHouse/ClickHouse/issues/17532): Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). +* Backported in [#17626](https://github.com/ClickHouse/ClickHouse/issues/17626): Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Backported in [#17609](https://github.com/ClickHouse/ClickHouse/issues/17609): When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17698](https://github.com/ClickHouse/ClickHouse/issues/17698): In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17730](https://github.com/ClickHouse/ClickHouse/issues/17730): Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17783](https://github.com/ClickHouse/ClickHouse/issues/17783): Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#17816](https://github.com/ClickHouse/ClickHouse/issues/17816): Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). + +#### Build/Testing/Packaging Improvement +* Backported in [#17289](https://github.com/ClickHouse/ClickHouse/issues/17289): Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)). + diff --git a/docs/changelogs/v20.10.7.4-stable.md b/docs/changelogs/v20.10.7.4-stable.md new file mode 100644 index 00000000000..ae9dd8e53fb --- /dev/null +++ b/docs/changelogs/v20.10.7.4-stable.md @@ -0,0 +1,13 @@ +### ClickHouse release v20.10.7.4-stable FIXME as compared to v20.10.6.27-stable + +#### Bug Fix +* Backported in [#17798](https://github.com/ClickHouse/ClickHouse/issues/17798): - Fix optimize_distributed_group_by_sharding_key for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#18395](https://github.com/ClickHouse/ClickHouse/issues/18395): Fix empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#18042](https://github.com/ClickHouse/ClickHouse/issues/18042): Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#17980](https://github.com/ClickHouse/ClickHouse/issues/17980): fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) support convert MySQL prefix index for MaterializeMySQL CC: @tavplubix. [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18079](https://github.com/ClickHouse/ClickHouse/issues/18079): Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#18178](https://github.com/ClickHouse/ClickHouse/issues/18178): Fix `Unknown setting profile` error on attempt to set settings profile. [#18167](https://github.com/ClickHouse/ClickHouse/pull/18167) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#18361](https://github.com/ClickHouse/ClickHouse/issues/18361): fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) fix unique key convert crash in MaterializeMySQL database engine. [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18292](https://github.com/ClickHouse/ClickHouse/issues/18292): Fix key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#18295](https://github.com/ClickHouse/ClickHouse/issues/18295): - Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual IPv4/IPv6 stack; - Fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes; Possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)). + diff --git a/docs/changelogs/v20.11.1.5109-prestable.md b/docs/changelogs/v20.11.1.5109-prestable.md new file mode 100644 index 00000000000..0ebe0bf8b55 --- /dev/null +++ b/docs/changelogs/v20.11.1.5109-prestable.md @@ -0,0 +1,157 @@ +### ClickHouse release v20.11.1.5109-prestable FIXME as compared to v20.10.1.4881-prestable + +#### Backward Incompatible Change +* Make rankCorr function return nan on insufficient data [#16124](https://github.com/ClickHouse/ClickHouse/issues/16124). [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)). +* Aggregate functions `boundingRatio`, `rankCorr`, `retention`, `timeSeriesGroupSum`, `timeSeriesGroupRateSum`, `windowFunnel` were erroneously made case-insensitive. Now their names are made case sensitive as designed. Only functions that are specified in SQL standard or made for compatibility with other DBMS or functions similar to those should be case-insensitive. [#16407](https://github.com/ClickHouse/ClickHouse/pull/16407) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `ANALYZE` and `AST` queries, and make the setting `enable_debug_queries` obsolete since now it is the part of full featured `EXPLAIN` query. [#16536](https://github.com/ClickHouse/ClickHouse/pull/16536) ([Ivan](https://github.com/abyss7)). +* Restrict to use of non-comparable data types (like `AggregateFunction`) in keys (Sorting key, Primary key, Partition key, and so on). [#16601](https://github.com/ClickHouse/ClickHouse/pull/16601) ([alesapin](https://github.com/alesapin)). +* If some `profile` was specified in `distributed_ddl` config section, then this profile could overwrite settings of `default` profile on server startup. It's fixed, now settings of distributed DDL queries should not affect global server settings. [#16635](https://github.com/ClickHouse/ClickHouse/pull/16635) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### New Feature +* #WelchTTest aggregate function implementation. [#10351](https://github.com/ClickHouse/ClickHouse/pull/10351) ([antikvist](https://github.com/antikvist)). +* New functions `encrypt`, `aes_encrypt_mysql`, `decrypt`, `aes_decrypt_mysql`. These functions are working slowly (below ClickHouse standards), so we consider it as an experimental feature. [#11844](https://github.com/ClickHouse/ClickHouse/pull/11844) ([Vasily Nemkov](https://github.com/Enmk)). +* - Added support of LDAP as a user directory for locally non-existent users. [#12736](https://github.com/ClickHouse/ClickHouse/pull/12736) ([Denis Glazachev](https://github.com/traceon)). +* Added `disable_merges` option for volumes in multi-disk configuration. [#13956](https://github.com/ClickHouse/ClickHouse/pull/13956) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Added initial OpenTelemetry support. ClickHouse now accepts OpenTelemetry traceparent headers over Native and HTTP protocols, and passes them downstream in some cases. The trace spans for executed queries are saved into the `system.opentelemetry_span_log` table. [#14195](https://github.com/ClickHouse/ClickHouse/pull/14195) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Allows to read/write Single protobuf message at once (w/o length-delimiters). [#15199](https://github.com/ClickHouse/ClickHouse/pull/15199) ([filimonov](https://github.com/filimonov)). +* Add function `formatReadableTimeDelta` that format time delta to human readable string ... [#15497](https://github.com/ClickHouse/ClickHouse/pull/15497) ([Filipe Caixeta](https://github.com/filipecaixeta)). +* Add `tid` and `logTrace` function. This closes [#9434](https://github.com/ClickHouse/ClickHouse/issues/9434). [#15803](https://github.com/ClickHouse/ClickHouse/pull/15803) ([flynn](https://github.com/ucasfl)). +* Add a new option `print_query_id` to clickhouse-client. It helps generate arbitrary strings with the current query id generated by the client. [#15809](https://github.com/ClickHouse/ClickHouse/pull/15809) ([Amos Bird](https://github.com/amosbird)). +* Allow specify primary key in column list of CREATE TABLE query. [#15823](https://github.com/ClickHouse/ClickHouse/pull/15823) ([Maksim Kita](https://github.com/kitaisreal)). +* Added setting date_time_output_format. [#15845](https://github.com/ClickHouse/ClickHouse/pull/15845) ([Maksim Kita](https://github.com/kitaisreal)). +* Implement ``` OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES} ``` in select Query with order by. related issue:[#15367](https://github.com/ClickHouse/ClickHouse/issues/15367). [#15855](https://github.com/ClickHouse/ClickHouse/pull/15855) ([hexiaoting](https://github.com/hexiaoting)). +* Added an aggregate function, which calculates the p-value used for Welch's t-test. [#15874](https://github.com/ClickHouse/ClickHouse/pull/15874) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add max_concurrent_queries_for_all_users setting, see [#6636](https://github.com/ClickHouse/ClickHouse/issues/6636) for use cases. [#16154](https://github.com/ClickHouse/ClickHouse/pull/16154) ([nvartolomei](https://github.com/nvartolomei)). +* Added minimal web UI to ClickHouse. [#16158](https://github.com/ClickHouse/ClickHouse/pull/16158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added function `untuple` which is a special function which can introduce new columns to the SELECT list by flattening a named tuple. [#16242](https://github.com/ClickHouse/ClickHouse/pull/16242) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Added toUUIDOrNull, toUUIDOrZero cast functions. [#16337](https://github.com/ClickHouse/ClickHouse/pull/16337) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `system.replicated_fetches` table which shows currently running background fetches. [#16428](https://github.com/ClickHouse/ClickHouse/pull/16428) ([alesapin](https://github.com/alesapin)). +* - `errorCodeToName()` function - return variable name of the error (useful for analyzing query_log and similar) - `system.errors` table - shows how many times errors has been happened (respects `system_events_show_zero_values`). [#16438](https://github.com/ClickHouse/ClickHouse/pull/16438) ([Azat Khuzhin](https://github.com/azat)). +* Ability to create a docker image on the top of alpine. Uses precompiled binary and glibc components from ubuntu 20.04. [#16479](https://github.com/ClickHouse/ClickHouse/pull/16479) ([filimonov](https://github.com/filimonov)). +* Add `log_queries_min_query_duration_ms`, only queries slower then the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)). +* > Add farmFingerprint64 function. [#16570](https://github.com/ClickHouse/ClickHouse/pull/16570) ([Jacob Hayes](https://github.com/JacobHayes)). +* Now we can provide identifiers via query parameters. And these parameters can be used as table objects or columns. [#3815](https://github.com/ClickHouse/ClickHouse/issues/3815). [#16594](https://github.com/ClickHouse/ClickHouse/pull/16594) ([Amos Bird](https://github.com/amosbird)). +* Added big integers (UInt256, Int128, Int256) and UUID data types support for MergeTree BloomFilter index. [#16642](https://github.com/ClickHouse/ClickHouse/pull/16642) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* Speedup hashed/sparse_hashed dictionary loading by preallocating the hash table. [#15454](https://github.com/ClickHouse/ClickHouse/pull/15454) ([Azat Khuzhin](https://github.com/azat)). +* Do not merge parts across partitions in SELECT FINAL. [#15938](https://github.com/ClickHouse/ClickHouse/pull/15938) ([Kruglov Pavel](https://github.com/Avogar)). +* Improved performance of merges assignment in MergeTree table engines. Shouldn't be visible for the user. [#16191](https://github.com/ClickHouse/ClickHouse/pull/16191) ([alesapin](https://github.com/alesapin)). +* Improve performance of logical functions a little. [#16347](https://github.com/ClickHouse/ClickHouse/pull/16347) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of `quantileMerge`. In previous versions it was obnoxiously slow. This closes [#1463](https://github.com/ClickHouse/ClickHouse/issues/1463). [#16643](https://github.com/ClickHouse/ClickHouse/pull/16643) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of `-OrNull` and `-OrDefault` aggregate functions. [#16661](https://github.com/ClickHouse/ClickHouse/pull/16661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Allow explicitly specify columns list in `CREATE TABLE table AS table_function(...)` query. Fixes [#9249](https://github.com/ClickHouse/ClickHouse/issues/9249) Fixes [#14214](https://github.com/ClickHouse/ClickHouse/issues/14214). [#14295](https://github.com/ClickHouse/ClickHouse/pull/14295) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Now trivial count optimization becomes slightly non-trivial. Predicates that contain exact partition expr can be optimized too. This also fixes [#11092](https://github.com/ClickHouse/ClickHouse/issues/11092) which returns wrong count when `max_parallel_replicas > 1`. [#15074](https://github.com/ClickHouse/ClickHouse/pull/15074) ([Amos Bird](https://github.com/amosbird)). +* Enable parsing enum values by their ids for CSV, TSV and JSON input formats. [#15685](https://github.com/ClickHouse/ClickHouse/pull/15685) ([vivarum](https://github.com/vivarum)). +* Add reconnects to `zookeeper-dump-tree` tool. [#15711](https://github.com/ClickHouse/ClickHouse/pull/15711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `MemoryTrackingInBackground*` metrics to avoid potentially misleading results. This fixes [#15684](https://github.com/ClickHouse/ClickHouse/issues/15684). [#15813](https://github.com/ClickHouse/ClickHouse/pull/15813) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change level of some log messages from information to debug, so information messages will not appear for every query. This closes [#5293](https://github.com/ClickHouse/ClickHouse/issues/5293). [#15816](https://github.com/ClickHouse/ClickHouse/pull/15816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix query hang (endless loop) in case of misconfiguration (`connections_with_failover_max_tries` set to 0). [#15876](https://github.com/ClickHouse/ClickHouse/pull/15876) ([Azat Khuzhin](https://github.com/azat)). +* Added boost::program_options to `db_generator` in order to increase its usability. This closes [#15940](https://github.com/ClickHouse/ClickHouse/issues/15940). [#15973](https://github.com/ClickHouse/ClickHouse/pull/15973) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Treat `INTERVAL '1 hour'` as equivalent to `INTERVAL 1 HOUR`, to be compatible with Postgres. This fixes [#15637](https://github.com/ClickHouse/ClickHouse/issues/15637). [#15978](https://github.com/ClickHouse/ClickHouse/pull/15978) ([flynn](https://github.com/ucasfl)). +* Simplify the implementation of background tasks processing for the MergeTree table engines family. There should be no visible changes for user. [#15983](https://github.com/ClickHouse/ClickHouse/pull/15983) ([alesapin](https://github.com/alesapin)). +* Add support of cache layout for Redis dictionaries with complex key. [#15985](https://github.com/ClickHouse/ClickHouse/pull/15985) ([Anton Popov](https://github.com/CurtizJ)). +* Fix rare issue when clickhouse-client may abort on exit due to loading of suggestions. This fixes [#16035](https://github.com/ClickHouse/ClickHouse/issues/16035). [#16047](https://github.com/ClickHouse/ClickHouse/pull/16047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* - Fix memory_profiler_step/max_untracked_memory for queries via HTTP (test included), and adjusting this value globally in xml config will not help either, since those settings are not applied anyway, only default (4MB) value is [used](https://github.com/ClickHouse/ClickHouse/blob/17731245336d8c84f75e4c0894c5797ed7732190/src/Common/ThreadStatus.h#L104). - Fix query_id for the most root ThreadStatus of the http query (by initializing QueryScope after reading query_id). [#16101](https://github.com/ClickHouse/ClickHouse/pull/16101) ([Azat Khuzhin](https://github.com/azat)). +* Add allow_nondeterministic_optimize_skip_unused_shards (to allow non deterministic like rand() or dictGet() in sharding key). [#16105](https://github.com/ClickHouse/ClickHouse/pull/16105) ([Azat Khuzhin](https://github.com/azat)). +* database_atomic_wait_for_drop_and_detach_synchronously/NO DELAY/SYNC for DROP DATABASE. [#16127](https://github.com/ClickHouse/ClickHouse/pull/16127) ([Azat Khuzhin](https://github.com/azat)). +* Add support for nested data types (like named tuple) as sub-types. Fixes [#15587](https://github.com/ClickHouse/ClickHouse/issues/15587). [#16262](https://github.com/ClickHouse/ClickHouse/pull/16262) ([Ivan](https://github.com/abyss7)). +* If there are no tmp folder in the system (chroot, misconfigutation etc) clickhouse-local will create temporary subfolder in the current directory. [#16280](https://github.com/ClickHouse/ClickHouse/pull/16280) ([filimonov](https://github.com/filimonov)). +* Now it's possible to specify `PRIMARY KEY` without `ORDER BY` for MergeTree table engines family. Closes [#15591](https://github.com/ClickHouse/ClickHouse/issues/15591). [#16284](https://github.com/ClickHouse/ClickHouse/pull/16284) ([alesapin](https://github.com/alesapin)). +* try use cmake version for croaring instead of amalgamation.sh. [#16285](https://github.com/ClickHouse/ClickHouse/pull/16285) ([sundyli](https://github.com/sundy-li)). +* Add total_rows/total_bytes (from system.tables) support for Set/Join table engines. [#16306](https://github.com/ClickHouse/ClickHouse/pull/16306) ([Azat Khuzhin](https://github.com/azat)). +* Better diagnostics when client has dropped connection. In previous versions, `Attempt to read after EOF` and `Broken pipe` exceptions were logged in server. In new version, it's information message `Client has dropped the connection, cancel the query.`. [#16329](https://github.com/ClickHouse/ClickHouse/pull/16329) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `TablesToDropQueueSize` metric. It's equal to number of dropped tables, that are waiting for background data removal. [#16364](https://github.com/ClickHouse/ClickHouse/pull/16364) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix debug assertion in quantileDeterministic function. In previous version it may also transfer up to two times more data over the network. Although no bug existed. This fixes [#15683](https://github.com/ClickHouse/ClickHouse/issues/15683). [#16410](https://github.com/ClickHouse/ClickHouse/pull/16410) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better read task scheduling for JBOD architecture and `MergeTree` storage. New setting `read_backoff_min_concurrency` which serves as the lower limit to the number of reading threads. [#16423](https://github.com/ClickHouse/ClickHouse/pull/16423) ([Amos Bird](https://github.com/amosbird)). +* Fixed bug for [#16263](https://github.com/ClickHouse/ClickHouse/issues/16263). Also minimized event loop lifetime. Added more efficient queues setup. [#16426](https://github.com/ClickHouse/ClickHouse/pull/16426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to fetch parts that are already committed or outdated in the current instance into the detached directory. It's useful when migrating tables from another cluster and having N to 1 shards mapping. It's also consistent with the current fetchPartition implementation. [#16538](https://github.com/ClickHouse/ClickHouse/pull/16538) ([Amos Bird](https://github.com/amosbird)). +* Add current_database into query_thread_log. [#16558](https://github.com/ClickHouse/ClickHouse/pull/16558) ([Azat Khuzhin](https://github.com/azat)). +* Subqueries in WITH section (CTE) can reference previous subqueries in WITH section by their name. [#16575](https://github.com/ClickHouse/ClickHouse/pull/16575) ([Amos Bird](https://github.com/amosbird)). +* - Improve scheduling of background task which removes data of dropped tables in `Atomic` databases. - `Atomic` databases do not create broken symlink to table data directory if table actually has no data directory. [#16584](https://github.com/ClickHouse/ClickHouse/pull/16584) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Now paratmeterized functions can be used in APPLY column transformer. [#16589](https://github.com/ClickHouse/ClickHouse/pull/16589) ([Amos Bird](https://github.com/amosbird)). +* Now `event_time_microseconds` field stores in Decimal64, not UInt64. Removed an incorrect check from Field::get(). [#16617](https://github.com/ClickHouse/ClickHouse/pull/16617) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Apply SETTINGS clause as early as possible. It allows to modify more settings in the query. This closes [#3178](https://github.com/ClickHouse/ClickHouse/issues/3178). [#16619](https://github.com/ClickHouse/ClickHouse/pull/16619) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better update of ZooKeeper configuration in runtime. [#16630](https://github.com/ClickHouse/ClickHouse/pull/16630) ([sundyli](https://github.com/sundy-li)). +* Make the behaviour of `minMap` and `maxMap` more desireable. It will not skip zero values in the result. Fixes [#16087](https://github.com/ClickHouse/ClickHouse/issues/16087). [#16631](https://github.com/ClickHouse/ClickHouse/pull/16631) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Better diagnostics on parse errors in input data. Provide row number on `Cannot read all data` errors. [#16644](https://github.com/ClickHouse/ClickHouse/pull/16644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Update jemalloc to fix percpu_arena with affinity mask. [#15035](https://github.com/ClickHouse/ClickHouse/pull/15035) ([Azat Khuzhin](https://github.com/azat)). +* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Possibility to move part to another disk/volume if the first attempt was failed. [#15723](https://github.com/ClickHouse/ClickHouse/pull/15723) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix drop of materialized view with inner table in Atomic database (hangs all subsequent DROP TABLE due to hang of the worker thread, due to recursive DROP TABLE for inner table of MV). [#15743](https://github.com/ClickHouse/ClickHouse/pull/15743) ([Azat Khuzhin](https://github.com/azat)). +* Fix `select count()` inaccuracy for MaterializeMySQL. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Now exception will be thrown when `ALTER MODIFY COLUMN ... DEFAULT ...` has incompatible default with column type. Fixes [#15854](https://github.com/ClickHouse/ClickHouse/issues/15854). [#15858](https://github.com/ClickHouse/ClickHouse/pull/15858) ([alesapin](https://github.com/alesapin)). +* Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* fixes [#12513](https://github.com/ClickHouse/ClickHouse/issues/12513) fix difference expressions with same alias when analyze queries again. [#15886](https://github.com/ClickHouse/ClickHouse/pull/15886) ([Winter Zhang](https://github.com/zhang2014)). +* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixes [#15780](https://github.com/ClickHouse/ClickHouse/issues/15780) regression, e.g. indexOf([1, 2, 3], toLowCardinality(1)) now is prohibited but it should not be. [#16038](https://github.com/ClickHouse/ClickHouse/pull/16038) ([Mike Kot](https://github.com/myrrc)). +* Fix segfault in some cases of wrong aggregation in lambdas. [#16082](https://github.com/ClickHouse/ClickHouse/pull/16082) ([Anton Popov](https://github.com/CurtizJ)). +* Fix the `clickhouse-local` crash when trying to do `OPTIMIZE` command. Fixes [#16076](https://github.com/ClickHouse/ClickHouse/issues/16076). [#16192](https://github.com/ClickHouse/ClickHouse/pull/16192) ([filimonov](https://github.com/filimonov)). +* Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)). +* Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a possible memory leak during `GROUP BY` with string keys, caused by an error in `TwoLevelStringHashTable` implementation. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Specifically crafter argument of `round` function with `Decimal` was leading to integer division by zero. This fixes [#13338](https://github.com/ClickHouse/ClickHouse/issues/13338). [#16451](https://github.com/ClickHouse/ClickHouse/pull/16451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike Kot](https://github.com/myrrc)). +* Fix query_thread_log.query_duration_ms unit. [#16563](https://github.com/ClickHouse/ClickHouse/pull/16563) ([Azat Khuzhin](https://github.com/azat)). +* Calculation of `DEFAULT` expressions was involving possible name collisions (that was very unlikely to encounter). This fixes [#9359](https://github.com/ClickHouse/ClickHouse/issues/9359). [#16612](https://github.com/ClickHouse/ClickHouse/pull/16612) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The setting `max_parallel_replicas` worked incorrectly if the queried table has no sampling. This fixes [#5733](https://github.com/ClickHouse/ClickHouse/issues/5733). [#16675](https://github.com/ClickHouse/ClickHouse/pull/16675) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Simplify Sys/V init script. [#14135](https://github.com/ClickHouse/ClickHouse/pull/14135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix illegal code style `&vector[idx]` in libhdfs3. This fixes libcxx debug build. See also https://github.com/ClickHouse-Extras/libhdfs3/pull/8 . [#15815](https://github.com/ClickHouse/ClickHouse/pull/15815) ([Amos Bird](https://github.com/amosbird)). +* Check for `#pragma once` in headers. [#15818](https://github.com/ClickHouse/ClickHouse/pull/15818) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check for executable bit on non-executable files. People often accidentially commit executable files from Windows. [#15843](https://github.com/ClickHouse/ClickHouse/pull/15843) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Updated RBAC syntax tests. - Updated `INSERT` and `SELECT` RBAC tests. - New RBAC tests for `ALTER` privileges by @ritaank - New RBAC tests for `MATERIALIZED`, `LIVE`, and standard `VIEWS`. - New RBAC tests for public system tables. - New RBAC tests for `SHOW TABLES`. [#16044](https://github.com/ClickHouse/ClickHouse/pull/16044) ([MyroTk](https://github.com/MyroTk)). +* Refuse to build with AppleClang because it's difficult to find out version correspondence. This closes [#16072](https://github.com/ClickHouse/ClickHouse/issues/16072). [#16074](https://github.com/ClickHouse/ClickHouse/pull/16074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add scipy to fasttest docker. [#16093](https://github.com/ClickHouse/ClickHouse/pull/16093) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add MySQL BinLog file check tool for MaterializeMySQL database engine. [#16223](https://github.com/ClickHouse/ClickHouse/pull/16223) ([Winter Zhang](https://github.com/zhang2014)). +* Add flaky check for stateless tests. [#16238](https://github.com/ClickHouse/ClickHouse/pull/16238) ([alesapin](https://github.com/alesapin)). +* Converting test tests/queries/0_stateless/01446_json_strings_each_row to a shell script. [#16247](https://github.com/ClickHouse/ClickHouse/pull/16247) ([vzakaznikov](https://github.com/vzakaznikov)). +* - None. [#16249](https://github.com/ClickHouse/ClickHouse/pull/16249) ([Denis Glazachev](https://github.com/traceon)). +* Fixing fails in LDAP external user directory tests. [#16363](https://github.com/ClickHouse/ClickHouse/pull/16363) ([vzakaznikov](https://github.com/vzakaznikov)). +* During config removal wrong exit code was expected. [#16365](https://github.com/ClickHouse/ClickHouse/pull/16365) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fix LDAP tests by grabbing log size after container is stopped. [#16440](https://github.com/ClickHouse/ClickHouse/pull/16440) ([vzakaznikov](https://github.com/vzakaznikov)). +* Improve generation of build files for `ya.make` build system (Arcadia). [#16700](https://github.com/ClickHouse/ClickHouse/pull/16700) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Other +* Use only |name_parts| as primary name source and auto-generate full name. [#16149](https://github.com/ClickHouse/ClickHouse/pull/16149) ([Ivan](https://github.com/abyss7)). +* Rename struct NullSink from ReadHelpers to NullOutput, because class NullSink exists in Processors/NullSink.h. It's needed to prevent redefinition of 'NullSink' error. [#16520](https://github.com/ClickHouse/ClickHouse/pull/16520) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL CATEGORY + +* Try to make MergeTreeWriteAheadLog forward compatible. [#16094](https://github.com/ClickHouse/ClickHouse/pull/16094) ([nvartolomei](https://github.com/nvartolomei)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Write structure of table functions to metadata"'. [#15961](https://github.com/ClickHouse/ClickHouse/pull/15961) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "scipy"'. [#16156](https://github.com/ClickHouse/ClickHouse/pull/16156) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Bump markdown from 3.2.1 to 3.3.2 in /docs/tools'. [#16180](https://github.com/ClickHouse/ClickHouse/pull/16180) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Revert "Optionally upload clickhouse binary in fast test"'. [#16333](https://github.com/ClickHouse/ClickHouse/pull/16333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'minor fix.'. [#16335](https://github.com/ClickHouse/ClickHouse/pull/16335) ([Xianda Ke](https://github.com/kexianda)). +* NO CL ENTRY: 'Bump tornado from 5.1.1 to 6.1 in /docs/tools'. [#16590](https://github.com/ClickHouse/ClickHouse/pull/16590) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.17 to 0.4.20 in /docs/tools'. [#16692](https://github.com/ClickHouse/ClickHouse/pull/16692) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + diff --git a/docs/changelogs/v20.11.2.1-stable.md b/docs/changelogs/v20.11.2.1-stable.md new file mode 100644 index 00000000000..b3a71a0fdfa --- /dev/null +++ b/docs/changelogs/v20.11.2.1-stable.md @@ -0,0 +1,162 @@ +### ClickHouse release v20.11.2.1-stable FIXME as compared to v20.10.1.4881-prestable + +#### Backward Incompatible Change +* Make rankCorr function return nan on insufficient data [#16124](https://github.com/ClickHouse/ClickHouse/issues/16124). [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)). +* Aggregate functions `boundingRatio`, `rankCorr`, `retention`, `timeSeriesGroupSum`, `timeSeriesGroupRateSum`, `windowFunnel` were erroneously made case-insensitive. Now their names are made case sensitive as designed. Only functions that are specified in SQL standard or made for compatibility with other DBMS or functions similar to those should be case-insensitive. [#16407](https://github.com/ClickHouse/ClickHouse/pull/16407) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `ANALYZE` and `AST` queries, and make the setting `enable_debug_queries` obsolete since now it is the part of full featured `EXPLAIN` query. [#16536](https://github.com/ClickHouse/ClickHouse/pull/16536) ([Ivan](https://github.com/abyss7)). +* Restrict to use of non-comparable data types (like `AggregateFunction`) in keys (Sorting key, Primary key, Partition key, and so on). [#16601](https://github.com/ClickHouse/ClickHouse/pull/16601) ([alesapin](https://github.com/alesapin)). +* If some `profile` was specified in `distributed_ddl` config section, then this profile could overwrite settings of `default` profile on server startup. It's fixed, now settings of distributed DDL queries should not affect global server settings. [#16635](https://github.com/ClickHouse/ClickHouse/pull/16635) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### New Feature +* #WelchTTest aggregate function implementation. [#10351](https://github.com/ClickHouse/ClickHouse/pull/10351) ([antikvist](https://github.com/antikvist)). +* New functions `encrypt`, `aes_encrypt_mysql`, `decrypt`, `aes_decrypt_mysql`. These functions are working slowly (below ClickHouse standards), so we consider it as an experimental feature. [#11844](https://github.com/ClickHouse/ClickHouse/pull/11844) ([Vasily Nemkov](https://github.com/Enmk)). +* - Added support of LDAP as a user directory for locally non-existent users. [#12736](https://github.com/ClickHouse/ClickHouse/pull/12736) ([Denis Glazachev](https://github.com/traceon)). +* Added `disable_merges` option for volumes in multi-disk configuration. [#13956](https://github.com/ClickHouse/ClickHouse/pull/13956) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Added initial OpenTelemetry support. ClickHouse now accepts OpenTelemetry traceparent headers over Native and HTTP protocols, and passes them downstream in some cases. The trace spans for executed queries are saved into the `system.opentelemetry_span_log` table. [#14195](https://github.com/ClickHouse/ClickHouse/pull/14195) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Allows to read/write Single protobuf message at once (w/o length-delimiters). [#15199](https://github.com/ClickHouse/ClickHouse/pull/15199) ([filimonov](https://github.com/filimonov)). +* Add function `formatReadableTimeDelta` that format time delta to human readable string ... [#15497](https://github.com/ClickHouse/ClickHouse/pull/15497) ([Filipe Caixeta](https://github.com/filipecaixeta)). +* Add `tid` and `logTrace` function. This closes [#9434](https://github.com/ClickHouse/ClickHouse/issues/9434). [#15803](https://github.com/ClickHouse/ClickHouse/pull/15803) ([flynn](https://github.com/ucasfl)). +* Add a new option `print_query_id` to clickhouse-client. It helps generate arbitrary strings with the current query id generated by the client. [#15809](https://github.com/ClickHouse/ClickHouse/pull/15809) ([Amos Bird](https://github.com/amosbird)). +* Allow specify primary key in column list of CREATE TABLE query. [#15823](https://github.com/ClickHouse/ClickHouse/pull/15823) ([Maksim Kita](https://github.com/kitaisreal)). +* Added setting date_time_output_format. [#15845](https://github.com/ClickHouse/ClickHouse/pull/15845) ([Maksim Kita](https://github.com/kitaisreal)). +* Implement ``` OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES} ``` in select Query with order by. related issue:[#15367](https://github.com/ClickHouse/ClickHouse/issues/15367). [#15855](https://github.com/ClickHouse/ClickHouse/pull/15855) ([hexiaoting](https://github.com/hexiaoting)). +* Added an aggregate function, which calculates the p-value used for Welch's t-test. [#15874](https://github.com/ClickHouse/ClickHouse/pull/15874) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add max_concurrent_queries_for_all_users setting, see [#6636](https://github.com/ClickHouse/ClickHouse/issues/6636) for use cases. [#16154](https://github.com/ClickHouse/ClickHouse/pull/16154) ([nvartolomei](https://github.com/nvartolomei)). +* Added minimal web UI to ClickHouse. [#16158](https://github.com/ClickHouse/ClickHouse/pull/16158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added function `untuple` which is a special function which can introduce new columns to the SELECT list by flattening a named tuple. [#16242](https://github.com/ClickHouse/ClickHouse/pull/16242) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Added toUUIDOrNull, toUUIDOrZero cast functions. [#16337](https://github.com/ClickHouse/ClickHouse/pull/16337) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `system.replicated_fetches` table which shows currently running background fetches. [#16428](https://github.com/ClickHouse/ClickHouse/pull/16428) ([alesapin](https://github.com/alesapin)). +* - `errorCodeToName()` function - return variable name of the error (useful for analyzing query_log and similar) - `system.errors` table - shows how many times errors has been happened (respects `system_events_show_zero_values`). [#16438](https://github.com/ClickHouse/ClickHouse/pull/16438) ([Azat Khuzhin](https://github.com/azat)). +* Ability to create a docker image on the top of alpine. Uses precompiled binary and glibc components from ubuntu 20.04. [#16479](https://github.com/ClickHouse/ClickHouse/pull/16479) ([filimonov](https://github.com/filimonov)). +* Add `log_queries_min_query_duration_ms`, only queries slower then the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)). +* > Add farmFingerprint64 function. [#16570](https://github.com/ClickHouse/ClickHouse/pull/16570) ([Jacob Hayes](https://github.com/JacobHayes)). +* Now we can provide identifiers via query parameters. And these parameters can be used as table objects or columns. [#3815](https://github.com/ClickHouse/ClickHouse/issues/3815). [#16594](https://github.com/ClickHouse/ClickHouse/pull/16594) ([Amos Bird](https://github.com/amosbird)). +* Added big integers (UInt256, Int128, Int256) and UUID data types support for MergeTree BloomFilter index. [#16642](https://github.com/ClickHouse/ClickHouse/pull/16642) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* Speedup hashed/sparse_hashed dictionary loading by preallocating the hash table. [#15454](https://github.com/ClickHouse/ClickHouse/pull/15454) ([Azat Khuzhin](https://github.com/azat)). +* Do not merge parts across partitions in SELECT FINAL. [#15938](https://github.com/ClickHouse/ClickHouse/pull/15938) ([Kruglov Pavel](https://github.com/Avogar)). +* Improved performance of merges assignment in MergeTree table engines. Shouldn't be visible for the user. [#16191](https://github.com/ClickHouse/ClickHouse/pull/16191) ([alesapin](https://github.com/alesapin)). +* Improve performance of logical functions a little. [#16347](https://github.com/ClickHouse/ClickHouse/pull/16347) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of `quantileMerge`. In previous versions it was obnoxiously slow. This closes [#1463](https://github.com/ClickHouse/ClickHouse/issues/1463). [#16643](https://github.com/ClickHouse/ClickHouse/pull/16643) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of `-OrNull` and `-OrDefault` aggregate functions. [#16661](https://github.com/ClickHouse/ClickHouse/pull/16661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Allow explicitly specify columns list in `CREATE TABLE table AS table_function(...)` query. Fixes [#9249](https://github.com/ClickHouse/ClickHouse/issues/9249) Fixes [#14214](https://github.com/ClickHouse/ClickHouse/issues/14214). [#14295](https://github.com/ClickHouse/ClickHouse/pull/14295) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Now trivial count optimization becomes slightly non-trivial. Predicates that contain exact partition expr can be optimized too. This also fixes [#11092](https://github.com/ClickHouse/ClickHouse/issues/11092) which returns wrong count when `max_parallel_replicas > 1`. [#15074](https://github.com/ClickHouse/ClickHouse/pull/15074) ([Amos Bird](https://github.com/amosbird)). +* Enable parsing enum values by their ids for CSV, TSV and JSON input formats. [#15685](https://github.com/ClickHouse/ClickHouse/pull/15685) ([vivarum](https://github.com/vivarum)). +* Add reconnects to `zookeeper-dump-tree` tool. [#15711](https://github.com/ClickHouse/ClickHouse/pull/15711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `MemoryTrackingInBackground*` metrics to avoid potentially misleading results. This fixes [#15684](https://github.com/ClickHouse/ClickHouse/issues/15684). [#15813](https://github.com/ClickHouse/ClickHouse/pull/15813) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change level of some log messages from information to debug, so information messages will not appear for every query. This closes [#5293](https://github.com/ClickHouse/ClickHouse/issues/5293). [#15816](https://github.com/ClickHouse/ClickHouse/pull/15816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix query hang (endless loop) in case of misconfiguration (`connections_with_failover_max_tries` set to 0). [#15876](https://github.com/ClickHouse/ClickHouse/pull/15876) ([Azat Khuzhin](https://github.com/azat)). +* Added boost::program_options to `db_generator` in order to increase its usability. This closes [#15940](https://github.com/ClickHouse/ClickHouse/issues/15940). [#15973](https://github.com/ClickHouse/ClickHouse/pull/15973) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Treat `INTERVAL '1 hour'` as equivalent to `INTERVAL 1 HOUR`, to be compatible with Postgres. This fixes [#15637](https://github.com/ClickHouse/ClickHouse/issues/15637). [#15978](https://github.com/ClickHouse/ClickHouse/pull/15978) ([flynn](https://github.com/ucasfl)). +* Add support of cache layout for Redis dictionaries with complex key. [#15985](https://github.com/ClickHouse/ClickHouse/pull/15985) ([Anton Popov](https://github.com/CurtizJ)). +* Fix rare issue when clickhouse-client may abort on exit due to loading of suggestions. This fixes [#16035](https://github.com/ClickHouse/ClickHouse/issues/16035). [#16047](https://github.com/ClickHouse/ClickHouse/pull/16047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* - Fix memory_profiler_step/max_untracked_memory for queries via HTTP (test included), and adjusting this value globally in xml config will not help either, since those settings are not applied anyway, only default (4MB) value is [used](https://github.com/ClickHouse/ClickHouse/blob/17731245336d8c84f75e4c0894c5797ed7732190/src/Common/ThreadStatus.h#L104). - Fix query_id for the most root ThreadStatus of the http query (by initializing QueryScope after reading query_id). [#16101](https://github.com/ClickHouse/ClickHouse/pull/16101) ([Azat Khuzhin](https://github.com/azat)). +* Add allow_nondeterministic_optimize_skip_unused_shards (to allow non deterministic like rand() or dictGet() in sharding key). [#16105](https://github.com/ClickHouse/ClickHouse/pull/16105) ([Azat Khuzhin](https://github.com/azat)). +* database_atomic_wait_for_drop_and_detach_synchronously/NO DELAY/SYNC for DROP DATABASE. [#16127](https://github.com/ClickHouse/ClickHouse/pull/16127) ([Azat Khuzhin](https://github.com/azat)). +* Add support for nested data types (like named tuple) as sub-types. Fixes [#15587](https://github.com/ClickHouse/ClickHouse/issues/15587). [#16262](https://github.com/ClickHouse/ClickHouse/pull/16262) ([Ivan](https://github.com/abyss7)). +* If there are no tmp folder in the system (chroot, misconfigutation etc) clickhouse-local will create temporary subfolder in the current directory. [#16280](https://github.com/ClickHouse/ClickHouse/pull/16280) ([filimonov](https://github.com/filimonov)). +* Now it's possible to specify `PRIMARY KEY` without `ORDER BY` for MergeTree table engines family. Closes [#15591](https://github.com/ClickHouse/ClickHouse/issues/15591). [#16284](https://github.com/ClickHouse/ClickHouse/pull/16284) ([alesapin](https://github.com/alesapin)). +* try use cmake version for croaring instead of amalgamation.sh. [#16285](https://github.com/ClickHouse/ClickHouse/pull/16285) ([sundyli](https://github.com/sundy-li)). +* Add total_rows/total_bytes (from system.tables) support for Set/Join table engines. [#16306](https://github.com/ClickHouse/ClickHouse/pull/16306) ([Azat Khuzhin](https://github.com/azat)). +* Better diagnostics when client has dropped connection. In previous versions, `Attempt to read after EOF` and `Broken pipe` exceptions were logged in server. In new version, it's information message `Client has dropped the connection, cancel the query.`. [#16329](https://github.com/ClickHouse/ClickHouse/pull/16329) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `TablesToDropQueueSize` metric. It's equal to number of dropped tables, that are waiting for background data removal. [#16364](https://github.com/ClickHouse/ClickHouse/pull/16364) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix debug assertion in quantileDeterministic function. In previous version it may also transfer up to two times more data over the network. Although no bug existed. This fixes [#15683](https://github.com/ClickHouse/ClickHouse/issues/15683). [#16410](https://github.com/ClickHouse/ClickHouse/pull/16410) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better read task scheduling for JBOD architecture and `MergeTree` storage. New setting `read_backoff_min_concurrency` which serves as the lower limit to the number of reading threads. [#16423](https://github.com/ClickHouse/ClickHouse/pull/16423) ([Amos Bird](https://github.com/amosbird)). +* Fixed bug for [#16263](https://github.com/ClickHouse/ClickHouse/issues/16263). Also minimized event loop lifetime. Added more efficient queues setup. [#16426](https://github.com/ClickHouse/ClickHouse/pull/16426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to fetch parts that are already committed or outdated in the current instance into the detached directory. It's useful when migrating tables from another cluster and having N to 1 shards mapping. It's also consistent with the current fetchPartition implementation. [#16538](https://github.com/ClickHouse/ClickHouse/pull/16538) ([Amos Bird](https://github.com/amosbird)). +* Add current_database into query_thread_log. [#16558](https://github.com/ClickHouse/ClickHouse/pull/16558) ([Azat Khuzhin](https://github.com/azat)). +* Subqueries in WITH section (CTE) can reference previous subqueries in WITH section by their name. [#16575](https://github.com/ClickHouse/ClickHouse/pull/16575) ([Amos Bird](https://github.com/amosbird)). +* - Improve scheduling of background task which removes data of dropped tables in `Atomic` databases. - `Atomic` databases do not create broken symlink to table data directory if table actually has no data directory. [#16584](https://github.com/ClickHouse/ClickHouse/pull/16584) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Now paratmeterized functions can be used in APPLY column transformer. [#16589](https://github.com/ClickHouse/ClickHouse/pull/16589) ([Amos Bird](https://github.com/amosbird)). +* Now `event_time_microseconds` field stores in Decimal64, not UInt64. Removed an incorrect check from Field::get(). [#16617](https://github.com/ClickHouse/ClickHouse/pull/16617) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Apply SETTINGS clause as early as possible. It allows to modify more settings in the query. This closes [#3178](https://github.com/ClickHouse/ClickHouse/issues/3178). [#16619](https://github.com/ClickHouse/ClickHouse/pull/16619) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better update of ZooKeeper configuration in runtime. [#16630](https://github.com/ClickHouse/ClickHouse/pull/16630) ([sundyli](https://github.com/sundy-li)). +* Make the behaviour of `minMap` and `maxMap` more desireable. It will not skip zero values in the result. Fixes [#16087](https://github.com/ClickHouse/ClickHouse/issues/16087). [#16631](https://github.com/ClickHouse/ClickHouse/pull/16631) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Better diagnostics on parse errors in input data. Provide row number on `Cannot read all data` errors. [#16644](https://github.com/ClickHouse/ClickHouse/pull/16644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like http://domain.com?delete, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like http://domain.com/?delete. [#16814](https://github.com/ClickHouse/ClickHouse/pull/16814) ([ianton-ru](https://github.com/ianton-ru)). + +#### Bug Fix +* Update jemalloc to fix percpu_arena with affinity mask. [#15035](https://github.com/ClickHouse/ClickHouse/pull/15035) ([Azat Khuzhin](https://github.com/azat)). +* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Possibility to move part to another disk/volume if the first attempt was failed. [#15723](https://github.com/ClickHouse/ClickHouse/pull/15723) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix drop of materialized view with inner table in Atomic database (hangs all subsequent DROP TABLE due to hang of the worker thread, due to recursive DROP TABLE for inner table of MV). [#15743](https://github.com/ClickHouse/ClickHouse/pull/15743) ([Azat Khuzhin](https://github.com/azat)). +* Fix `select count()` inaccuracy for MaterializeMySQL. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Now exception will be thrown when `ALTER MODIFY COLUMN ... DEFAULT ...` has incompatible default with column type. Fixes [#15854](https://github.com/ClickHouse/ClickHouse/issues/15854). [#15858](https://github.com/ClickHouse/ClickHouse/pull/15858) ([alesapin](https://github.com/alesapin)). +* Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* fixes [#12513](https://github.com/ClickHouse/ClickHouse/issues/12513) fix difference expressions with same alias when analyze queries again. [#15886](https://github.com/ClickHouse/ClickHouse/pull/15886) ([Winter Zhang](https://github.com/zhang2014)). +* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixes [#15780](https://github.com/ClickHouse/ClickHouse/issues/15780) regression, e.g. indexOf([1, 2, 3], toLowCardinality(1)) now is prohibited but it should not be. [#16038](https://github.com/ClickHouse/ClickHouse/pull/16038) ([Mike Kot](https://github.com/myrrc)). +* Fix segfault in some cases of wrong aggregation in lambdas. [#16082](https://github.com/ClickHouse/ClickHouse/pull/16082) ([Anton Popov](https://github.com/CurtizJ)). +* Fix the `clickhouse-local` crash when trying to do `OPTIMIZE` command. Fixes [#16076](https://github.com/ClickHouse/ClickHouse/issues/16076). [#16192](https://github.com/ClickHouse/ClickHouse/pull/16192) ([filimonov](https://github.com/filimonov)). +* Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)). +* Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a possible memory leak during `GROUP BY` with string keys, caused by an error in `TwoLevelStringHashTable` implementation. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Specifically crafter argument of `round` function with `Decimal` was leading to integer division by zero. This fixes [#13338](https://github.com/ClickHouse/ClickHouse/issues/13338). [#16451](https://github.com/ClickHouse/ClickHouse/pull/16451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike Kot](https://github.com/myrrc)). +* Fix query_thread_log.query_duration_ms unit. [#16563](https://github.com/ClickHouse/ClickHouse/pull/16563) ([Azat Khuzhin](https://github.com/azat)). +* Calculation of `DEFAULT` expressions was involving possible name collisions (that was very unlikely to encounter). This fixes [#9359](https://github.com/ClickHouse/ClickHouse/issues/9359). [#16612](https://github.com/ClickHouse/ClickHouse/pull/16612) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#16750](https://github.com/ClickHouse/ClickHouse/issues/16750): Fixed [#16081](https://github.com/ClickHouse/ClickHouse/issues/16081). [#16613](https://github.com/ClickHouse/ClickHouse/pull/16613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#16758](https://github.com/ClickHouse/ClickHouse/issues/16758): This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* The setting `max_parallel_replicas` worked incorrectly if the queried table has no sampling. This fixes [#5733](https://github.com/ClickHouse/ClickHouse/issues/5733). [#16675](https://github.com/ClickHouse/ClickHouse/pull/16675) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#16739](https://github.com/ClickHouse/ClickHouse/issues/16739): Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#16784](https://github.com/ClickHouse/ClickHouse/issues/16784): Mask password in data_path in the system.distribution_queue. [#16727](https://github.com/ClickHouse/ClickHouse/pull/16727) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16785](https://github.com/ClickHouse/ClickHouse/issues/16785): Not for changelog. [#16757](https://github.com/ClickHouse/ClickHouse/pull/16757) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### Build/Testing/Packaging Improvement +* Simplify Sys/V init script. [#14135](https://github.com/ClickHouse/ClickHouse/pull/14135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix illegal code style `&vector[idx]` in libhdfs3. This fixes libcxx debug build. See also https://github.com/ClickHouse-Extras/libhdfs3/pull/8 . [#15815](https://github.com/ClickHouse/ClickHouse/pull/15815) ([Amos Bird](https://github.com/amosbird)). +* Check for `#pragma once` in headers. [#15818](https://github.com/ClickHouse/ClickHouse/pull/15818) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check for executable bit on non-executable files. People often accidentially commit executable files from Windows. [#15843](https://github.com/ClickHouse/ClickHouse/pull/15843) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Updated RBAC syntax tests. - Updated `INSERT` and `SELECT` RBAC tests. - New RBAC tests for `ALTER` privileges by @ritaank - New RBAC tests for `MATERIALIZED`, `LIVE`, and standard `VIEWS`. - New RBAC tests for public system tables. - New RBAC tests for `SHOW TABLES`. [#16044](https://github.com/ClickHouse/ClickHouse/pull/16044) ([MyroTk](https://github.com/MyroTk)). +* Refuse to build with AppleClang because it's difficult to find out version correspondence. This closes [#16072](https://github.com/ClickHouse/ClickHouse/issues/16072). [#16074](https://github.com/ClickHouse/ClickHouse/pull/16074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add scipy to fasttest docker. [#16093](https://github.com/ClickHouse/ClickHouse/pull/16093) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add MySQL BinLog file check tool for MaterializeMySQL database engine. [#16223](https://github.com/ClickHouse/ClickHouse/pull/16223) ([Winter Zhang](https://github.com/zhang2014)). +* Add flaky check for stateless tests. [#16238](https://github.com/ClickHouse/ClickHouse/pull/16238) ([alesapin](https://github.com/alesapin)). +* Converting test tests/queries/0_stateless/01446_json_strings_each_row to a shell script. [#16247](https://github.com/ClickHouse/ClickHouse/pull/16247) ([vzakaznikov](https://github.com/vzakaznikov)). +* - None. [#16249](https://github.com/ClickHouse/ClickHouse/pull/16249) ([Denis Glazachev](https://github.com/traceon)). +* Fixing fails in LDAP external user directory tests. [#16363](https://github.com/ClickHouse/ClickHouse/pull/16363) ([vzakaznikov](https://github.com/vzakaznikov)). +* During config removal wrong exit code was expected. [#16365](https://github.com/ClickHouse/ClickHouse/pull/16365) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fix LDAP tests by grabbing log size after container is stopped. [#16440](https://github.com/ClickHouse/ClickHouse/pull/16440) ([vzakaznikov](https://github.com/vzakaznikov)). +* Improve generation of build files for `ya.make` build system (Arcadia). [#16700](https://github.com/ClickHouse/ClickHouse/pull/16700) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Other +* Use only |name_parts| as primary name source and auto-generate full name. [#16149](https://github.com/ClickHouse/ClickHouse/pull/16149) ([Ivan](https://github.com/abyss7)). +* Rename struct NullSink from ReadHelpers to NullOutput, because class NullSink exists in Processors/NullSink.h. It's needed to prevent redefinition of 'NullSink' error. [#16520](https://github.com/ClickHouse/ClickHouse/pull/16520) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL CATEGORY + +* Try to make MergeTreeWriteAheadLog forward compatible. [#16094](https://github.com/ClickHouse/ClickHouse/pull/16094) ([nvartolomei](https://github.com/nvartolomei)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Write structure of table functions to metadata"'. [#15961](https://github.com/ClickHouse/ClickHouse/pull/15961) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "scipy"'. [#16156](https://github.com/ClickHouse/ClickHouse/pull/16156) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Bump markdown from 3.2.1 to 3.3.2 in /docs/tools'. [#16180](https://github.com/ClickHouse/ClickHouse/pull/16180) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Revert "Optionally upload clickhouse binary in fast test"'. [#16333](https://github.com/ClickHouse/ClickHouse/pull/16333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'minor fix.'. [#16335](https://github.com/ClickHouse/ClickHouse/pull/16335) ([Xianda Ke](https://github.com/kexianda)). +* NO CL ENTRY: 'Bump tornado from 5.1.1 to 6.1 in /docs/tools'. [#16590](https://github.com/ClickHouse/ClickHouse/pull/16590) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.17 to 0.4.20 in /docs/tools'. [#16692](https://github.com/ClickHouse/ClickHouse/pull/16692) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + diff --git a/docs/changelogs/v20.11.3.3-stable.md b/docs/changelogs/v20.11.3.3-stable.md new file mode 100644 index 00000000000..55126a08ec4 --- /dev/null +++ b/docs/changelogs/v20.11.3.3-stable.md @@ -0,0 +1,5 @@ +### ClickHouse release v20.11.3.3-stable FIXME as compared to v20.11.2.1-stable + +#### Bug Fix +* Backported in [#16891](https://github.com/ClickHouse/ClickHouse/issues/16891): Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.11.4.13-stable.md b/docs/changelogs/v20.11.4.13-stable.md new file mode 100644 index 00000000000..76b4b10867e --- /dev/null +++ b/docs/changelogs/v20.11.4.13-stable.md @@ -0,0 +1,20 @@ +### ClickHouse release v20.11.4.13-stable FIXME as compared to v20.11.3.3-stable + +#### Improvement +* Backported in [#17032](https://github.com/ClickHouse/ClickHouse/issues/17032): Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix +* Backported in [#17074](https://github.com/ClickHouse/ClickHouse/issues/17074): fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#17024](https://github.com/ClickHouse/ClickHouse/issues/17024): Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#16881](https://github.com/ClickHouse/ClickHouse/issues/16881): Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)). +* Backported in [#16948](https://github.com/ClickHouse/ClickHouse/issues/16948): Prevent clickhouse server crashes when using TimeSeriesGroupSum. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)). +* Backported in [#17076](https://github.com/ClickHouse/ClickHouse/issues/17076): Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17010](https://github.com/ClickHouse/ClickHouse/issues/17010): Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)). +* Backported in [#16966](https://github.com/ClickHouse/ClickHouse/issues/16966): Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17014](https://github.com/ClickHouse/ClickHouse/issues/17014): Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17039](https://github.com/ClickHouse/ClickHouse/issues/17039): Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)). +* Backported in [#17093](https://github.com/ClickHouse/ClickHouse/issues/17093): Fixed wrong result in big integers (128, 256 bit) when casting from double. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#17127](https://github.com/ClickHouse/ClickHouse/issues/17127): Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#17132](https://github.com/ClickHouse/ClickHouse/issues/17132): Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17170](https://github.com/ClickHouse/ClickHouse/issues/17170): Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.11.5.18-stable.md b/docs/changelogs/v20.11.5.18-stable.md new file mode 100644 index 00000000000..88c798984ab --- /dev/null +++ b/docs/changelogs/v20.11.5.18-stable.md @@ -0,0 +1,33 @@ +### ClickHouse release v20.11.5.18-stable FIXME as compared to v20.11.4.13-stable + +#### Performance Improvement +* Backported in [#17592](https://github.com/ClickHouse/ClickHouse/issues/17592): Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* Backported in [#17629](https://github.com/ClickHouse/ClickHouse/issues/17629): Throw error when use ColumnTransformer replace non exist column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#17158](https://github.com/ClickHouse/ClickHouse/issues/17158): Fixed uncontrolled growth of TDigest. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)). +* Backported in [#17313](https://github.com/ClickHouse/ClickHouse/issues/17313): Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#17342](https://github.com/ClickHouse/ClickHouse/issues/17342): TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17588](https://github.com/ClickHouse/ClickHouse/issues/17588): Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17595](https://github.com/ClickHouse/ClickHouse/issues/17595): Fix order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17430](https://github.com/ClickHouse/ClickHouse/issues/17430): Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#17194](https://github.com/ClickHouse/ClickHouse/issues/17194): Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17558](https://github.com/ClickHouse/ClickHouse/issues/17558): Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17396](https://github.com/ClickHouse/ClickHouse/issues/17396): Fix [#15235](https://github.com/ClickHouse/ClickHouse/issues/15235). When clickhouse-copier handle non-partitioned table, throws segfault error. [#17248](https://github.com/ClickHouse/ClickHouse/pull/17248) ([Qi Chen](https://github.com/kaka11chen)). +* Backported in [#17408](https://github.com/ClickHouse/ClickHouse/issues/17408): Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17490](https://github.com/ClickHouse/ClickHouse/issues/17490): Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17494](https://github.com/ClickHouse/ClickHouse/issues/17494): Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17521](https://github.com/ClickHouse/ClickHouse/issues/17521): Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17534](https://github.com/ClickHouse/ClickHouse/issues/17534): Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). +* Backported in [#17628](https://github.com/ClickHouse/ClickHouse/issues/17628): Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Backported in [#17610](https://github.com/ClickHouse/ClickHouse/issues/17610): When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17684](https://github.com/ClickHouse/ClickHouse/issues/17684): Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17696](https://github.com/ClickHouse/ClickHouse/issues/17696): In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix optimize_trivial_count_query with partition predicate (backport [#16767](https://github.com/ClickHouse/ClickHouse/issues/16767) to 20.11). [#17644](https://github.com/ClickHouse/ClickHouse/pull/17644) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#17728](https://github.com/ClickHouse/ClickHouse/issues/17728): Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17785](https://github.com/ClickHouse/ClickHouse/issues/17785): Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#17815](https://github.com/ClickHouse/ClickHouse/issues/17815): Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). + +#### Build/Testing/Packaging Improvement +* Backported in [#17290](https://github.com/ClickHouse/ClickHouse/issues/17290): Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)). + diff --git a/docs/changelogs/v20.11.6.6-stable.md b/docs/changelogs/v20.11.6.6-stable.md new file mode 100644 index 00000000000..f6c3e5fb8d6 --- /dev/null +++ b/docs/changelogs/v20.11.6.6-stable.md @@ -0,0 +1,14 @@ +### ClickHouse release v20.11.6.6-stable FIXME as compared to v20.11.5.18-stable + +#### Bug Fix +* Backported in [#17797](https://github.com/ClickHouse/ClickHouse/issues/17797): - Fix optimize_distributed_group_by_sharding_key for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#18394](https://github.com/ClickHouse/ClickHouse/issues/18394): Fix empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#18044](https://github.com/ClickHouse/ClickHouse/issues/18044): Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#18022](https://github.com/ClickHouse/ClickHouse/issues/18022): Trivial query optimization was producing wrong result if query contains ARRAY JOIN (so query is actually non trivial). [#17887](https://github.com/ClickHouse/ClickHouse/pull/17887) ([sundyli](https://github.com/sundy-li)). +* Backported in [#17979](https://github.com/ClickHouse/ClickHouse/issues/17979): fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) support convert MySQL prefix index for MaterializeMySQL CC: @tavplubix. [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18080](https://github.com/ClickHouse/ClickHouse/issues/18080): Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#18180](https://github.com/ClickHouse/ClickHouse/issues/18180): Fix `Unknown setting profile` error on attempt to set settings profile. [#18167](https://github.com/ClickHouse/ClickHouse/pull/18167) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#18358](https://github.com/ClickHouse/ClickHouse/issues/18358): fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) fix unique key convert crash in MaterializeMySQL database engine. [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18259](https://github.com/ClickHouse/ClickHouse/issues/18259): Fix key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#18297](https://github.com/ClickHouse/ClickHouse/issues/18297): - Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual IPv4/IPv6 stack; - Fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes; Possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)). + diff --git a/docs/changelogs/v20.11.7.16-stable.md b/docs/changelogs/v20.11.7.16-stable.md new file mode 100644 index 00000000000..a4160b47556 --- /dev/null +++ b/docs/changelogs/v20.11.7.16-stable.md @@ -0,0 +1,62 @@ +### ClickHouse release v20.11.7.16-stable FIXME as compared to v20.11.6.6-stable + +#### Improvement +* Backported in [#19147](https://github.com/ClickHouse/ClickHouse/issues/19147): Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix +* Backported in [#18268](https://github.com/ClickHouse/ClickHouse/issues/18268): Fix indeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#19657](https://github.com/ClickHouse/ClickHouse/issues/19657): fix data type convert issue for mysql engine ... [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). +* Backported in [#18165](https://github.com/ClickHouse/ClickHouse/issues/18165): Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). +* Backported in [#19056](https://github.com/ClickHouse/ClickHouse/issues/19056): Fix inserting a row with default value in case of parsing error in the last column. Fixes [#17712](https://github.com/ClickHouse/ClickHouse/issues/17712). [#18182](https://github.com/ClickHouse/ClickHouse/pull/18182) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Backported in [#18230](https://github.com/ClickHouse/ClickHouse/issues/18230): Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message ` MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#18632](https://github.com/ClickHouse/ClickHouse/issues/18632): `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#18426](https://github.com/ClickHouse/ClickHouse/issues/18426): Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#19160](https://github.com/ClickHouse/ClickHouse/issues/19160): Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#18430](https://github.com/ClickHouse/ClickHouse/issues/18430): Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#18483](https://github.com/ClickHouse/ClickHouse/issues/18483): Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#18472](https://github.com/ClickHouse/ClickHouse/issues/18472): Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19718](https://github.com/ClickHouse/ClickHouse/issues/19718): Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#18532](https://github.com/ClickHouse/ClickHouse/issues/18532): Proper support for 12AM in `parseDateTimeBestEffort` function. This fixes [#18402](https://github.com/ClickHouse/ClickHouse/issues/18402). [#18449](https://github.com/ClickHouse/ClickHouse/pull/18449) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). +* Backported in [#18503](https://github.com/ClickHouse/ClickHouse/issues/18503): Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Backported in [#18600](https://github.com/ClickHouse/ClickHouse/issues/18600): Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)). +* Backported in [#18576](https://github.com/ClickHouse/ClickHouse/issues/18576): Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#18607](https://github.com/ClickHouse/ClickHouse/issues/18607): Add FixedString Data type support. I'll get this exception "Code: 50, e.displayText() = DB::Exception: Unsupported type FixedString(1)" when replicating data from MySQL to ClickHouse. This patch fixes bug [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)). +* Backported in [#18735](https://github.com/ClickHouse/ClickHouse/issues/18735): Fix Logger with unmatched arg size. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)). +* Backported in [#18947](https://github.com/ClickHouse/ClickHouse/issues/18947): Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18801](https://github.com/ClickHouse/ClickHouse/issues/18801): Asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#18838](https://github.com/ClickHouse/ClickHouse/issues/18838): Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#18908](https://github.com/ClickHouse/ClickHouse/issues/18908): Fix possible hang at shutdown in clickhouse-local. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19113](https://github.com/ClickHouse/ClickHouse/issues/19113): Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Backported in [#18997](https://github.com/ClickHouse/ClickHouse/issues/18997): Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* Backported in [#19194](https://github.com/ClickHouse/ClickHouse/issues/19194): Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19092](https://github.com/ClickHouse/ClickHouse/issues/19092): Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19050](https://github.com/ClickHouse/ClickHouse/issues/19050): Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19082](https://github.com/ClickHouse/ClickHouse/issues/19082): Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19049](https://github.com/ClickHouse/ClickHouse/issues/19049): Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19125](https://github.com/ClickHouse/ClickHouse/issues/19125): Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#19564](https://github.com/ClickHouse/ClickHouse/issues/19564): Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19231](https://github.com/ClickHouse/ClickHouse/issues/19231): Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Backported in [#19181](https://github.com/ClickHouse/ClickHouse/issues/19181): Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19268](https://github.com/ClickHouse/ClickHouse/issues/19268): Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Backported in [#19665](https://github.com/ClickHouse/ClickHouse/issues/19665): Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19555](https://github.com/ClickHouse/ClickHouse/issues/19555): Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19470](https://github.com/ClickHouse/ClickHouse/issues/19470): - Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#19439](https://github.com/ClickHouse/ClickHouse/issues/19439): Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19614](https://github.com/ClickHouse/ClickHouse/issues/19614): Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19669](https://github.com/ClickHouse/ClickHouse/issues/19669): Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19509](https://github.com/ClickHouse/ClickHouse/issues/19509): Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19538](https://github.com/ClickHouse/ClickHouse/issues/19538): Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19641](https://github.com/ClickHouse/ClickHouse/issues/19641): Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#19637](https://github.com/ClickHouse/ClickHouse/issues/19637): `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19653](https://github.com/ClickHouse/ClickHouse/issues/19653): Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19740](https://github.com/ClickHouse/ClickHouse/issues/19740): Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19698](https://github.com/ClickHouse/ClickHouse/issues/19698): Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19817](https://github.com/ClickHouse/ClickHouse/issues/19817): Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#19880](https://github.com/ClickHouse/ClickHouse/issues/19880): Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19812](https://github.com/ClickHouse/ClickHouse/issues/19812): In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19940](https://github.com/ClickHouse/ClickHouse/issues/19940): Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19936](https://github.com/ClickHouse/ClickHouse/issues/19936): BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Build/Testing/Packaging Improvement +* Backported in [#18543](https://github.com/ClickHouse/ClickHouse/issues/18543): Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.12.1.5236-prestable.md b/docs/changelogs/v20.12.1.5236-prestable.md new file mode 100644 index 00000000000..f4ecc83e176 --- /dev/null +++ b/docs/changelogs/v20.12.1.5236-prestable.md @@ -0,0 +1,98 @@ +### ClickHouse release v20.12.1.5236-prestable FIXME as compared to v20.11.1.5109-prestable + +#### Backward Incompatible Change +* Accept user settings related to file formats (e.g. `format_csv_delimiter`) in the `SETTINGS` clause when creating a table that uses `File` engine, and use these settings in all `INSERT`s and `SELECT`s. The file format settings changed in the current user session, or in the `SETTINGS` clause of a DML query itself, no longer affect the query. [#16591](https://github.com/ClickHouse/ClickHouse/pull/16591) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Enable `use_compact_format_in_distributed_parts_names` by default (see the documentation for the reference). [#16728](https://github.com/ClickHouse/ClickHouse/pull/16728) ([Azat Khuzhin](https://github.com/azat)). + +#### New Feature +* Added new ALTER UPDATE/DELETE IN PARTITION syntax. [#13403](https://github.com/ClickHouse/ClickHouse/pull/13403) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add StorageEmbeddedRocksdb Engine. [#15073](https://github.com/ClickHouse/ClickHouse/pull/15073) ([sundyli](https://github.com/sundy-li)). +* Introduce the query `ALTER TABLE ... DROP|DETACH PART 'part_name'`. [#15511](https://github.com/ClickHouse/ClickHouse/pull/15511) ([nvartolomei](https://github.com/nvartolomei)). +* Make it possible to change the path to history file in `clickhouse-client` using the `--history_file` parameter. [#15960](https://github.com/ClickHouse/ClickHouse/pull/15960) ([Maksim Kita](https://github.com/kitaisreal)). +* Updated DateTime, DateTime64 formatting to accept string Date literal format. [#16040](https://github.com/ClickHouse/ClickHouse/pull/16040) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting `aggregate_functions_null_for_empty`, this option will rewrite all aggregate functions in a query, adding -OrNull suffix to them. fix [10273](https://github.com/ClickHouse/ClickHouse/issues/10273). [#16123](https://github.com/ClickHouse/ClickHouse/pull/16123) ([flynn](https://github.com/ucasfl)). +* Add COLLATE support for Nullable, LowCardinality, Array and Tuple, where nested type is String. Also refactor the code associated with collations in ColumnString.cpp. [#16273](https://github.com/ClickHouse/ClickHouse/pull/16273) ([Kruglov Pavel](https://github.com/Avogar)). +* Possibility to distribute the merges between different replicas. Introduces the `execute_merges_on_single_replica_time_threshold` mergetree setting. [#16424](https://github.com/ClickHouse/ClickHouse/pull/16424) ([filimonov](https://github.com/filimonov)). +* add `*.xz` compression/decompression support.It enables using `*.xz` in `file()` function.This closes [#8828](https://github.com/ClickHouse/ClickHouse/issues/8828). [#16578](https://github.com/ClickHouse/ClickHouse/pull/16578) ([Abi Palagashvili](https://github.com/fibersel)). +* Add new `cmath` functions: - acosh - asinh - atan2 - atanh - cosh - hypot - log1p - sinh. [#16636](https://github.com/ClickHouse/ClickHouse/pull/16636) ([Konstantin Malanchev](https://github.com/hombit)). +* Add a possibility to input enum value as it's id in TSV and CSV formats by default. [#16834](https://github.com/ClickHouse/ClickHouse/pull/16834) ([Kruglov Pavel](https://github.com/Avogar)). +* New tcpPort() function returns TCP port listened by this server. [#17134](https://github.com/ClickHouse/ClickHouse/pull/17134) ([Ivan](https://github.com/abyss7)). + +#### Performance Improvement +* Now we can safely prune partitions with exact match. Useful case: Suppose table is partitioned by intHash64(x) % 100 and the query has condition on intHash64(x) % 100 verbatim, not on x. [#16253](https://github.com/ClickHouse/ClickHouse/pull/16253) ([Amos Bird](https://github.com/amosbird)). +* Use Floyd-Rivest algorithm, it should be the best for the ClickHouse use case of partial sorting. Bechmarks are in https://github.com/danlark1/miniselect and [here](https://drive.google.com/drive/folders/1DHEaeXgZuX6AJ9eByeZ8iQVQv0ueP8XM). [#16825](https://github.com/ClickHouse/ClickHouse/pull/16825) ([Daniel Kutenin](https://github.com/danlark1)). + +#### Improvement +* Add `VIEW` subquery description to `EXPLAIN`. Limit push down optimisation for `VIEW`. Add local replicas of `Distributed` to query plan. [#14936](https://github.com/ClickHouse/ClickHouse/pull/14936) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Made `indexOf()` use BloomFilter. [#14977](https://github.com/ClickHouse/ClickHouse/pull/14977) ([achimbab](https://github.com/achimbab)). +* Throw exception about right sync privileges when MySQL sync user has error privileges. [#15977](https://github.com/ClickHouse/ClickHouse/pull/15977) ([TCeason](https://github.com/TCeason)). +* Fix possible stack overflow if a loop of materialized views is created. This closes [#15732](https://github.com/ClickHouse/ClickHouse/issues/15732). [#16048](https://github.com/ClickHouse/ClickHouse/pull/16048) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support SNI in https connections to remote resources. This will allow to connect to Cloudflare servers that require SNI. This fixes [#10055](https://github.com/ClickHouse/ClickHouse/issues/10055). [#16252](https://github.com/ClickHouse/ClickHouse/pull/16252) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now `ReplicatedMergeTree` tree engines family uses a separate thread pool for replicated fetches. Size of the pool limited by setting `background_fetches_pool_size` which can be tuned with a server restart. The default value of the setting is 3 and it means that the maximum amount of parallel fetches is equal to 3 (and it allows to utilize 10G network). Fixes #520. [#16390](https://github.com/ClickHouse/ClickHouse/pull/16390) ([alesapin](https://github.com/alesapin)). +* Now, `` configuration can be changed in `config.xml` and reloaded without server startup. [#16627](https://github.com/ClickHouse/ClickHouse/pull/16627) ([Amos Bird](https://github.com/amosbird)). +* Allow reinterpret between integers and floats of the same size. fix [16640](https://github.com/ClickHouse/ClickHouse/issues/16640). [#16657](https://github.com/ClickHouse/ClickHouse/pull/16657) ([flynn](https://github.com/ucasfl)). +* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like `http://domain.com?delete`, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like `http://domain.com/?delete`. [#16709](https://github.com/ClickHouse/ClickHouse/pull/16709) ([ianton-ru](https://github.com/ianton-ru)). +* Remove empty directories for async INSERT at start of Distributed engine. [#16729](https://github.com/ClickHouse/ClickHouse/pull/16729) ([Azat Khuzhin](https://github.com/azat)). +* Usability improvement: better suggestions in syntax error message when `CODEC` expression is misplaced in `CREATE TABLE` query. This fixes [#12493](https://github.com/ClickHouse/ClickHouse/issues/12493). [#16768](https://github.com/ClickHouse/ClickHouse/pull/16768) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better exception message when configuration for distributed DDL is absent. This fixes [#5075](https://github.com/ClickHouse/ClickHouse/issues/5075). [#16769](https://github.com/ClickHouse/ClickHouse/pull/16769) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Apply `use_compact_format_in_distributed_parts_names` for each INSERT (with internal_replication). [#16788](https://github.com/ClickHouse/ClickHouse/pull/16788) ([Azat Khuzhin](https://github.com/azat)). +* Server refused to startup with exception message if wrong config is given (`metric_log`.`collect_interval_milliseconds` is missing). [#16815](https://github.com/ClickHouse/ClickHouse/pull/16815) ([Ivan](https://github.com/abyss7)). +* Add cutToFirstSignificantSubdomainWithWWW(). [#16845](https://github.com/ClickHouse/ClickHouse/pull/16845) ([Azat Khuzhin](https://github.com/azat)). +* Throw an informative error message when doing ATTACH/DETACH TABLE . Before this PR, `detach table ` works but leads to an ill-formed in-memory metadata. [#16885](https://github.com/ClickHouse/ClickHouse/pull/16885) ([Amos Bird](https://github.com/amosbird)). +* Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm. [#16895](https://github.com/ClickHouse/ClickHouse/pull/16895) ([Anton Popov](https://github.com/CurtizJ)). +* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)). +* Set default `host` and `port` parameters for `SOURCE(CLICKHOUSE(...))` to current instance and set default `user` value to `'default'`. [#16997](https://github.com/ClickHouse/ClickHouse/pull/16997) ([Vladimir C](https://github.com/vdimir)). +* Add ability to output all rows as a JSON array in the `JSONEachRow` format, controlled by the `output_format_json_array_of_rows` setting. [#17152](https://github.com/ClickHouse/ClickHouse/pull/17152) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Allow formatting named tuples as JSON objects when using JSON input/output formats, controlled by the `output_format_json_named_tuples_as_objects` setting, disabled by default. [#17175](https://github.com/ClickHouse/ClickHouse/pull/17175) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Correct grammar in error message in JSONEachRow, JSONCompactEachRow, and RegexpRow input formats. [#17205](https://github.com/ClickHouse/ClickHouse/pull/17205) ([nico piderman](https://github.com/sneako)). + +#### Bug Fix +* fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed [#16081](https://github.com/ClickHouse/ClickHouse/issues/16081). [#16613](https://github.com/ClickHouse/ClickHouse/pull/16613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* Fixed uncontrolled growth of TDigest. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)). +* Turn off parallel parsing when there is no enough memory for all threads to work simultaneously. Also there could be exceptions like "Memory limit exceeded" when somebody will try to insert extremely huge rows (> min_chunk_bytes_for_parallel_parsing), because each piece to parse has to be independent set of strings (one or more). [#16721](https://github.com/ClickHouse/ClickHouse/pull/16721) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* Mask password in data_path in the system.distribution_queue. [#16727](https://github.com/ClickHouse/ClickHouse/pull/16727) ([Azat Khuzhin](https://github.com/azat)). +* Not for changelog. [#16757](https://github.com/ClickHouse/ClickHouse/pull/16757) ([Alexander Tokmakov](https://github.com/tavplubix)). +* If no memory can be allocated while writing table metadata on disk, broken metadata file can be written. [#16772](https://github.com/ClickHouse/ClickHouse/pull/16772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)). +* Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent clickhouse server crashes when using TimeSeriesGroupSum. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)). +* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)). +* Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)). +* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)). +* Fixed wrong result in big integers (128, 256 bit) when casting from double. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike Kot](https://github.com/myrrc)). +* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)). +* Fix LLVM's libunwind in the case when CFA register is RAX. This is the [bug](https://bugs.llvm.org/show_bug.cgi?id=48186) in [LLVM's libunwind](https://github.com/llvm/llvm-project/tree/master/libunwind). We already have workarounds for this bug. [#17046](https://github.com/ClickHouse/ClickHouse/pull/17046) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)). +* fixes [#16923](https://github.com/ClickHouse/ClickHouse/issues/16923) fixes [#15883](https://github.com/ClickHouse/ClickHouse/issues/15883) Fix MaterializeMySQL SYNC failure when the modify MySQL binlog_checksum. [#17091](https://github.com/ClickHouse/ClickHouse/pull/17091) ([Winter Zhang](https://github.com/zhang2014)). +* Improve adaptive index granularity calculation when incoming blocks of data differ in bytes size a lot. [#17120](https://github.com/ClickHouse/ClickHouse/pull/17120) ([alesapin](https://github.com/alesapin)). +* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)). +* fix `toInt256(inf)` stack overflow. close [#17235](https://github.com/ClickHouse/ClickHouse/issues/17235). [#17257](https://github.com/ClickHouse/ClickHouse/pull/17257) ([flynn](https://github.com/ucasfl)). + +#### Build/Testing/Packaging Improvement +* Fix UBSan report when trying to convert infinite floating point number to integer. This closes [#14190](https://github.com/ClickHouse/ClickHouse/issues/14190). [#16677](https://github.com/ClickHouse/ClickHouse/pull/16677) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in cache dictionaries. This closes [#12641](https://github.com/ClickHouse/ClickHouse/issues/12641). [#16763](https://github.com/ClickHouse/ClickHouse/pull/16763) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not instrument 3rd-party libraries with UBSan. [#16764](https://github.com/ClickHouse/ClickHouse/pull/16764) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in Poco. This closes [#12719](https://github.com/ClickHouse/ClickHouse/issues/12719). [#16765](https://github.com/ClickHouse/ClickHouse/pull/16765) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix query_db_generate build error. [#16859](https://github.com/ClickHouse/ClickHouse/pull/16859) ([hhhhhzhen](https://github.com/su-houzhen)). +* Try fix fasttest submodule clone [#16132](https://github.com/ClickHouse/ClickHouse/issues/16132) https://clickhouse-test-reports.s3.yandex.net/16132/ad569f6d1bd2ce545db280daf7fbb9b8335de87b/fast_test.html#fail1. [#16908](https://github.com/ClickHouse/ClickHouse/pull/16908) ([Winter Zhang](https://github.com/zhang2014)). +* Fixing unstable test in tests/testflows/ldap/external_user_directory/tests/authentications.py. [#17161](https://github.com/ClickHouse/ClickHouse/pull/17161) ([vzakaznikov](https://github.com/vzakaznikov)). +* bump up rocksdb version to v6.14.5. [#17179](https://github.com/ClickHouse/ClickHouse/pull/17179) ([sundyli](https://github.com/sundy-li)). +* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)). +* Improvements in coverage building images. [#17233](https://github.com/ClickHouse/ClickHouse/pull/17233) ([alesapin](https://github.com/alesapin)). +* `std::logic_error` is used at line 294 of `base/common/StringRef.h`, so the appropriate `` header is required. [#17256](https://github.com/ClickHouse/ClickHouse/pull/17256) ([Matwey V. Kornilov](https://github.com/matwey)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'sync MySQL DDL atomicly'. [#16704](https://github.com/ClickHouse/ClickHouse/pull/16704) ([TCeason](https://github.com/TCeason)). +* NO CL ENTRY: 'RBAC Testflows - Server log intrumentation for debug and new ALTER tests'. [#16719](https://github.com/ClickHouse/ClickHouse/pull/16719) ([MyroTk](https://github.com/MyroTk)). +* NO CL ENTRY: 'Enabling existing testflows RBAC tests.'. [#16773](https://github.com/ClickHouse/ClickHouse/pull/16773) ([MyroTk](https://github.com/MyroTk)). +* NO CL ENTRY: 'Bump protobuf from 3.13.0 to 3.14.0 in /docs/tools'. [#17056](https://github.com/ClickHouse/ClickHouse/pull/17056) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Fixed a problem with the translation of the document'. [#17218](https://github.com/ClickHouse/ClickHouse/pull/17218) ([qianmoQ](https://github.com/qianmoQ)). + diff --git a/docs/changelogs/v20.12.2.1-stable.md b/docs/changelogs/v20.12.2.1-stable.md new file mode 100644 index 00000000000..6d8d6b151e5 --- /dev/null +++ b/docs/changelogs/v20.12.2.1-stable.md @@ -0,0 +1,127 @@ +### ClickHouse release v20.12.2.1-stable FIXME as compared to v20.11.1.5109-prestable + +#### Backward Incompatible Change +* Accept user settings related to file formats (e.g. `format_csv_delimiter`) in the `SETTINGS` clause when creating a table that uses `File` engine, and use these settings in all `INSERT`s and `SELECT`s. The file format settings changed in the current user session, or in the `SETTINGS` clause of a DML query itself, no longer affect the query. [#16591](https://github.com/ClickHouse/ClickHouse/pull/16591) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Enable `use_compact_format_in_distributed_parts_names` by default (see the documentation for the reference). [#16728](https://github.com/ClickHouse/ClickHouse/pull/16728) ([Azat Khuzhin](https://github.com/azat)). + +#### New Feature +* Added new ALTER UPDATE/DELETE IN PARTITION syntax. [#13403](https://github.com/ClickHouse/ClickHouse/pull/13403) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add StorageEmbeddedRocksdb Engine. [#15073](https://github.com/ClickHouse/ClickHouse/pull/15073) ([sundyli](https://github.com/sundy-li)). +* Introduce the query `ALTER TABLE ... DROP|DETACH PART 'part_name'`. [#15511](https://github.com/ClickHouse/ClickHouse/pull/15511) ([nvartolomei](https://github.com/nvartolomei)). +* Make it possible to change the path to history file in `clickhouse-client` using the `--history_file` parameter. [#15960](https://github.com/ClickHouse/ClickHouse/pull/15960) ([Maksim Kita](https://github.com/kitaisreal)). +* Updated DateTime, DateTime64 formatting to accept string Date literal format. [#16040](https://github.com/ClickHouse/ClickHouse/pull/16040) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting `aggregate_functions_null_for_empty`, this option will rewrite all aggregate functions in a query, adding -OrNull suffix to them. fix [10273](https://github.com/ClickHouse/ClickHouse/issues/10273). [#16123](https://github.com/ClickHouse/ClickHouse/pull/16123) ([flynn](https://github.com/ucasfl)). +* Add COLLATE support for Nullable, LowCardinality, Array and Tuple, where nested type is String. Also refactor the code associated with collations in ColumnString.cpp. [#16273](https://github.com/ClickHouse/ClickHouse/pull/16273) ([Kruglov Pavel](https://github.com/Avogar)). +* Possibility to distribute the merges between different replicas. Introduces the `execute_merges_on_single_replica_time_threshold` mergetree setting. [#16424](https://github.com/ClickHouse/ClickHouse/pull/16424) ([filimonov](https://github.com/filimonov)). +* add `*.xz` compression/decompression support.It enables using `*.xz` in `file()` function.This closes [#8828](https://github.com/ClickHouse/ClickHouse/issues/8828). [#16578](https://github.com/ClickHouse/ClickHouse/pull/16578) ([Abi Palagashvili](https://github.com/fibersel)). +* Add new `cmath` functions: - acosh - asinh - atan2 - atanh - cosh - hypot - log1p - sinh. [#16636](https://github.com/ClickHouse/ClickHouse/pull/16636) ([Konstantin Malanchev](https://github.com/hombit)). +* Add a possibility to input enum value as it's id in TSV and CSV formats by default. [#16834](https://github.com/ClickHouse/ClickHouse/pull/16834) ([Kruglov Pavel](https://github.com/Avogar)). +* New tcpPort() function returns TCP port listened by this server. [#17134](https://github.com/ClickHouse/ClickHouse/pull/17134) ([Ivan](https://github.com/abyss7)). + +#### Performance Improvement +* Now we can safely prune partitions with exact match. Useful case: Suppose table is partitioned by intHash64(x) % 100 and the query has condition on intHash64(x) % 100 verbatim, not on x. [#16253](https://github.com/ClickHouse/ClickHouse/pull/16253) ([Amos Bird](https://github.com/amosbird)). +* Use Floyd-Rivest algorithm, it should be the best for the ClickHouse use case of partial sorting. Bechmarks are in https://github.com/danlark1/miniselect and [here](https://drive.google.com/drive/folders/1DHEaeXgZuX6AJ9eByeZ8iQVQv0ueP8XM). [#16825](https://github.com/ClickHouse/ClickHouse/pull/16825) ([Daniel Kutenin](https://github.com/danlark1)). +* Backported in [#17589](https://github.com/ClickHouse/ClickHouse/issues/17589): Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* Add `VIEW` subquery description to `EXPLAIN`. Limit push down optimisation for `VIEW`. Add local replicas of `Distributed` to query plan. [#14936](https://github.com/ClickHouse/ClickHouse/pull/14936) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Made `indexOf()` use BloomFilter. [#14977](https://github.com/ClickHouse/ClickHouse/pull/14977) ([achimbab](https://github.com/achimbab)). +* Throw exception about right sync privileges when MySQL sync user has error privileges. [#15977](https://github.com/ClickHouse/ClickHouse/pull/15977) ([TCeason](https://github.com/TCeason)). +* Fix possible stack overflow if a loop of materialized views is created. This closes [#15732](https://github.com/ClickHouse/ClickHouse/issues/15732). [#16048](https://github.com/ClickHouse/ClickHouse/pull/16048) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support SNI in https connections to remote resources. This will allow to connect to Cloudflare servers that require SNI. This fixes [#10055](https://github.com/ClickHouse/ClickHouse/issues/10055). [#16252](https://github.com/ClickHouse/ClickHouse/pull/16252) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now `ReplicatedMergeTree` tree engines family uses a separate thread pool for replicated fetches. Size of the pool limited by setting `background_fetches_pool_size` which can be tuned with a server restart. The default value of the setting is 3 and it means that the maximum amount of parallel fetches is equal to 3 (and it allows to utilize 10G network). Fixes #520. [#16390](https://github.com/ClickHouse/ClickHouse/pull/16390) ([alesapin](https://github.com/alesapin)). +* Now, `` configuration can be changed in `config.xml` and reloaded without server startup. [#16627](https://github.com/ClickHouse/ClickHouse/pull/16627) ([Amos Bird](https://github.com/amosbird)). +* Allow reinterpret between integers and floats of the same size. fix [16640](https://github.com/ClickHouse/ClickHouse/issues/16640). [#16657](https://github.com/ClickHouse/ClickHouse/pull/16657) ([flynn](https://github.com/ucasfl)). +* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like `http://domain.com?delete`, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like `http://domain.com/?delete`. [#16709](https://github.com/ClickHouse/ClickHouse/pull/16709) ([ianton-ru](https://github.com/ianton-ru)). +* Remove empty directories for async INSERT at start of Distributed engine. [#16729](https://github.com/ClickHouse/ClickHouse/pull/16729) ([Azat Khuzhin](https://github.com/azat)). +* Usability improvement: better suggestions in syntax error message when `CODEC` expression is misplaced in `CREATE TABLE` query. This fixes [#12493](https://github.com/ClickHouse/ClickHouse/issues/12493). [#16768](https://github.com/ClickHouse/ClickHouse/pull/16768) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better exception message when configuration for distributed DDL is absent. This fixes [#5075](https://github.com/ClickHouse/ClickHouse/issues/5075). [#16769](https://github.com/ClickHouse/ClickHouse/pull/16769) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Apply `use_compact_format_in_distributed_parts_names` for each INSERT (with internal_replication). [#16788](https://github.com/ClickHouse/ClickHouse/pull/16788) ([Azat Khuzhin](https://github.com/azat)). +* Server refused to startup with exception message if wrong config is given (`metric_log`.`collect_interval_milliseconds` is missing). [#16815](https://github.com/ClickHouse/ClickHouse/pull/16815) ([Ivan](https://github.com/abyss7)). +* Add cutToFirstSignificantSubdomainWithWWW(). [#16845](https://github.com/ClickHouse/ClickHouse/pull/16845) ([Azat Khuzhin](https://github.com/azat)). +* Throw an informative error message when doing ATTACH/DETACH TABLE . Before this PR, `detach table ` works but leads to an ill-formed in-memory metadata. [#16885](https://github.com/ClickHouse/ClickHouse/pull/16885) ([Amos Bird](https://github.com/amosbird)). +* Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm. [#16895](https://github.com/ClickHouse/ClickHouse/pull/16895) ([Anton Popov](https://github.com/CurtizJ)). +* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)). +* Set default `host` and `port` parameters for `SOURCE(CLICKHOUSE(...))` to current instance and set default `user` value to `'default'`. [#16997](https://github.com/ClickHouse/ClickHouse/pull/16997) ([Vladimir C](https://github.com/vdimir)). +* Add ability to output all rows as a JSON array in the `JSONEachRow` format, controlled by the `output_format_json_array_of_rows` setting. [#17152](https://github.com/ClickHouse/ClickHouse/pull/17152) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Allow formatting named tuples as JSON objects when using JSON input/output formats, controlled by the `output_format_json_named_tuples_as_objects` setting, disabled by default. [#17175](https://github.com/ClickHouse/ClickHouse/pull/17175) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Correct grammar in error message in JSONEachRow, JSONCompactEachRow, and RegexpRow input formats. [#17205](https://github.com/ClickHouse/ClickHouse/pull/17205) ([nico piderman](https://github.com/sneako)). + +#### Bug Fix +* Backported in [#17620](https://github.com/ClickHouse/ClickHouse/issues/17620): Throw error when use ColumnTransformer replace non exist column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)). +* fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed [#16081](https://github.com/ClickHouse/ClickHouse/issues/16081). [#16613](https://github.com/ClickHouse/ClickHouse/pull/16613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* Fixed uncontrolled growth of TDigest. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)). +* Backported in [#17315](https://github.com/ClickHouse/ClickHouse/issues/17315): Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). +* Turn off parallel parsing when there is no enough memory for all threads to work simultaneously. Also there could be exceptions like "Memory limit exceeded" when somebody will try to insert extremely huge rows (> min_chunk_bytes_for_parallel_parsing), because each piece to parse has to be independent set of strings (one or more). [#16721](https://github.com/ClickHouse/ClickHouse/pull/16721) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* Mask password in data_path in the system.distribution_queue. [#16727](https://github.com/ClickHouse/ClickHouse/pull/16727) ([Azat Khuzhin](https://github.com/azat)). +* Not for changelog. [#16757](https://github.com/ClickHouse/ClickHouse/pull/16757) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17520](https://github.com/ClickHouse/ClickHouse/issues/17520): Fix optimize_trivial_count_query with partition predicate. [#16767](https://github.com/ClickHouse/ClickHouse/pull/16767) ([Azat Khuzhin](https://github.com/azat)). +* If no memory can be allocated while writing table metadata on disk, broken metadata file can be written. [#16772](https://github.com/ClickHouse/ClickHouse/pull/16772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)). +* Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent clickhouse server crashes when using TimeSeriesGroupSum. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)). +* Backported in [#17340](https://github.com/ClickHouse/ClickHouse/issues/17340): TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)). +* Backported in [#17586](https://github.com/ClickHouse/ClickHouse/issues/17586): Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17593](https://github.com/ClickHouse/ClickHouse/issues/17593): Fix order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)). +* Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)). +* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)). +* Fixed wrong result in big integers (128, 256 bit) when casting from double. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#17338](https://github.com/ClickHouse/ClickHouse/issues/17338): Fix Merge(Distributed()) with JOIN. [#16993](https://github.com/ClickHouse/ClickHouse/pull/16993) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#17746](https://github.com/ClickHouse/ClickHouse/issues/17746): - Fix optimize_distributed_group_by_sharding_key for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)). +* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)). +* Fix LLVM's libunwind in the case when CFA register is RAX. This is the [bug](https://bugs.llvm.org/show_bug.cgi?id=48186) in [LLVM's libunwind](https://github.com/llvm/llvm-project/tree/master/libunwind). We already have workarounds for this bug. [#17046](https://github.com/ClickHouse/ClickHouse/pull/17046) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17429](https://github.com/ClickHouse/ClickHouse/issues/17429): Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). +* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)). +* fixes [#16923](https://github.com/ClickHouse/ClickHouse/issues/16923) fixes [#15883](https://github.com/ClickHouse/ClickHouse/issues/15883) Fix MaterializeMySQL SYNC failure when the modify MySQL binlog_checksum. [#17091](https://github.com/ClickHouse/ClickHouse/pull/17091) ([Winter Zhang](https://github.com/zhang2014)). +* Improve adaptive index granularity calculation when incoming blocks of data differ in bytes size a lot. [#17120](https://github.com/ClickHouse/ClickHouse/pull/17120) ([alesapin](https://github.com/alesapin)). +* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17560](https://github.com/ClickHouse/ClickHouse/issues/17560): Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17394](https://github.com/ClickHouse/ClickHouse/issues/17394): Fix [#15235](https://github.com/ClickHouse/ClickHouse/issues/15235). When clickhouse-copier handle non-partitioned table, throws segfault error. [#17248](https://github.com/ClickHouse/ClickHouse/pull/17248) ([Qi Chen](https://github.com/kaka11chen)). +* Backported in [#17406](https://github.com/ClickHouse/ClickHouse/issues/17406): Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17422](https://github.com/ClickHouse/ClickHouse/issues/17422): Fix possible `Unexpected packet Data received from client` error for Distributed queries with `LIMIT`. [#17254](https://github.com/ClickHouse/ClickHouse/pull/17254) ([Azat Khuzhin](https://github.com/azat)). +* fix `toInt256(inf)` stack overflow. close [#17235](https://github.com/ClickHouse/ClickHouse/issues/17235). [#17257](https://github.com/ClickHouse/ClickHouse/pull/17257) ([flynn](https://github.com/ucasfl)). +* Backported in [#17489](https://github.com/ClickHouse/ClickHouse/issues/17489): Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17451](https://github.com/ClickHouse/ClickHouse/issues/17451): Fixed high CPU usage in background tasks of *MergeTree tables. [#17416](https://github.com/ClickHouse/ClickHouse/pull/17416) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17493](https://github.com/ClickHouse/ClickHouse/issues/17493): Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17523](https://github.com/ClickHouse/ClickHouse/issues/17523): Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17533](https://github.com/ClickHouse/ClickHouse/issues/17533): Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). +* Backported in [#17625](https://github.com/ClickHouse/ClickHouse/issues/17625): Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Backported in [#17611](https://github.com/ClickHouse/ClickHouse/issues/17611): Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17607](https://github.com/ClickHouse/ClickHouse/issues/17607): When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17683](https://github.com/ClickHouse/ClickHouse/issues/17683): Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17697](https://github.com/ClickHouse/ClickHouse/issues/17697): In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17727](https://github.com/ClickHouse/ClickHouse/issues/17727): Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17784](https://github.com/ClickHouse/ClickHouse/issues/17784): Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#17780](https://github.com/ClickHouse/ClickHouse/issues/17780): Exception message about max table size to drop was displayed incorrectly. [#17764](https://github.com/ClickHouse/ClickHouse/pull/17764) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17817](https://github.com/ClickHouse/ClickHouse/issues/17817): Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). +* Backported in [#17842](https://github.com/ClickHouse/ClickHouse/issues/17842): fix incorrect initialize `max_compress_block_size` of MergeTreeWriterSettings with `min_compress_block_size`. [#17833](https://github.com/ClickHouse/ClickHouse/pull/17833) ([flynn](https://github.com/ucasfl)). + +#### Build/Testing/Packaging Improvement +* Fix UBSan report when trying to convert infinite floating point number to integer. This closes [#14190](https://github.com/ClickHouse/ClickHouse/issues/14190). [#16677](https://github.com/ClickHouse/ClickHouse/pull/16677) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in cache dictionaries. This closes [#12641](https://github.com/ClickHouse/ClickHouse/issues/12641). [#16763](https://github.com/ClickHouse/ClickHouse/pull/16763) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not instrument 3rd-party libraries with UBSan. [#16764](https://github.com/ClickHouse/ClickHouse/pull/16764) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in Poco. This closes [#12719](https://github.com/ClickHouse/ClickHouse/issues/12719). [#16765](https://github.com/ClickHouse/ClickHouse/pull/16765) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix query_db_generate build error. [#16859](https://github.com/ClickHouse/ClickHouse/pull/16859) ([hhhhhzhen](https://github.com/su-houzhen)). +* Try fix fasttest submodule clone [#16132](https://github.com/ClickHouse/ClickHouse/issues/16132) https://clickhouse-test-reports.s3.yandex.net/16132/ad569f6d1bd2ce545db280daf7fbb9b8335de87b/fast_test.html#fail1. [#16908](https://github.com/ClickHouse/ClickHouse/pull/16908) ([Winter Zhang](https://github.com/zhang2014)). +* Fixing unstable test in tests/testflows/ldap/external_user_directory/tests/authentications.py. [#17161](https://github.com/ClickHouse/ClickHouse/pull/17161) ([vzakaznikov](https://github.com/vzakaznikov)). +* bump up rocksdb version to v6.14.5. [#17179](https://github.com/ClickHouse/ClickHouse/pull/17179) ([sundyli](https://github.com/sundy-li)). +* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)). +* Improvements in coverage building images. [#17233](https://github.com/ClickHouse/ClickHouse/pull/17233) ([alesapin](https://github.com/alesapin)). +* `std::logic_error` is used at line 294 of `base/common/StringRef.h`, so the appropriate `` header is required. [#17256](https://github.com/ClickHouse/ClickHouse/pull/17256) ([Matwey V. Kornilov](https://github.com/matwey)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'sync MySQL DDL atomicly'. [#16704](https://github.com/ClickHouse/ClickHouse/pull/16704) ([TCeason](https://github.com/TCeason)). +* NO CL ENTRY: 'RBAC Testflows - Server log intrumentation for debug and new ALTER tests'. [#16719](https://github.com/ClickHouse/ClickHouse/pull/16719) ([MyroTk](https://github.com/MyroTk)). +* NO CL ENTRY: 'Enabling existing testflows RBAC tests.'. [#16773](https://github.com/ClickHouse/ClickHouse/pull/16773) ([MyroTk](https://github.com/MyroTk)). +* NO CL ENTRY: 'Bump protobuf from 3.13.0 to 3.14.0 in /docs/tools'. [#17056](https://github.com/ClickHouse/ClickHouse/pull/17056) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Fixed a problem with the translation of the document'. [#17218](https://github.com/ClickHouse/ClickHouse/pull/17218) ([qianmoQ](https://github.com/qianmoQ)). + diff --git a/docs/changelogs/v20.12.3.3-stable.md b/docs/changelogs/v20.12.3.3-stable.md new file mode 100644 index 00000000000..9056ddd8b37 --- /dev/null +++ b/docs/changelogs/v20.12.3.3-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v20.12.3.3-stable FIXME as compared to v20.12.2.1-stable + diff --git a/docs/changelogs/v20.12.4.5-stable.md b/docs/changelogs/v20.12.4.5-stable.md new file mode 100644 index 00000000000..b28c5462c9d --- /dev/null +++ b/docs/changelogs/v20.12.4.5-stable.md @@ -0,0 +1,14 @@ +### ClickHouse release v20.12.4.5-stable FIXME as compared to v20.12.3.3-stable + +#### Bug Fix +* Backported in [#18392](https://github.com/ClickHouse/ClickHouse/issues/18392): Fix empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#18043](https://github.com/ClickHouse/ClickHouse/issues/18043): Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#18111](https://github.com/ClickHouse/ClickHouse/issues/18111): Fix max_distributed_connections (affects `prefer_localhost_replica=1` and `max_threads!=max_distributed_connections`). [#17848](https://github.com/ClickHouse/ClickHouse/pull/17848) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#18021](https://github.com/ClickHouse/ClickHouse/issues/18021): Trivial query optimization was producing wrong result if query contains ARRAY JOIN (so query is actually non trivial). [#17887](https://github.com/ClickHouse/ClickHouse/pull/17887) ([sundyli](https://github.com/sundy-li)). +* Backported in [#17981](https://github.com/ClickHouse/ClickHouse/issues/17981): fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) support convert MySQL prefix index for MaterializeMySQL CC: @tavplubix. [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18078](https://github.com/ClickHouse/ClickHouse/issues/18078): Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#18179](https://github.com/ClickHouse/ClickHouse/issues/18179): Fix `Unknown setting profile` error on attempt to set settings profile. [#18167](https://github.com/ClickHouse/ClickHouse/pull/18167) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#18359](https://github.com/ClickHouse/ClickHouse/issues/18359): fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) fix unique key convert crash in MaterializeMySQL database engine. [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18258](https://github.com/ClickHouse/ClickHouse/issues/18258): Fix key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#18296](https://github.com/ClickHouse/ClickHouse/issues/18296): - Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual IPv4/IPv6 stack; - Fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes; Possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)). + diff --git a/docs/changelogs/v20.12.5.14-stable.md b/docs/changelogs/v20.12.5.14-stable.md new file mode 100644 index 00000000000..04d5153a967 --- /dev/null +++ b/docs/changelogs/v20.12.5.14-stable.md @@ -0,0 +1,14 @@ +### ClickHouse release v20.12.5.14-stable FIXME as compared to v20.12.4.5-stable + +#### Bug Fix +* Backported in [#18166](https://github.com/ClickHouse/ClickHouse/issues/18166): Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). +* Backported in [#18424](https://github.com/ClickHouse/ClickHouse/issues/18424): Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#18428](https://github.com/ClickHouse/ClickHouse/issues/18428): Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#18482](https://github.com/ClickHouse/ClickHouse/issues/18482): Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#18471](https://github.com/ClickHouse/ClickHouse/issues/18471): Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#18533](https://github.com/ClickHouse/ClickHouse/issues/18533): Proper support for 12AM in `parseDateTimeBestEffort` function. This fixes [#18402](https://github.com/ClickHouse/ClickHouse/issues/18402). [#18449](https://github.com/ClickHouse/ClickHouse/pull/18449) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). +* Backported in [#18502](https://github.com/ClickHouse/ClickHouse/issues/18502): Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement +* Backported in [#18546](https://github.com/ClickHouse/ClickHouse/issues/18546): Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.12.5.18-stable.md b/docs/changelogs/v20.12.5.18-stable.md new file mode 100644 index 00000000000..ec9c79b5f4f --- /dev/null +++ b/docs/changelogs/v20.12.5.18-stable.md @@ -0,0 +1,57 @@ +### ClickHouse release v20.12.5.18-stable FIXME as compared to v20.12.5.14-stable + +#### Improvement +* Backported in [#19149](https://github.com/ClickHouse/ClickHouse/issues/19149): Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix +* Backported in [#18267](https://github.com/ClickHouse/ClickHouse/issues/18267): Fix indeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#19655](https://github.com/ClickHouse/ClickHouse/issues/19655): fix data type convert issue for mysql engine ... [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). +* Backported in [#18311](https://github.com/ClickHouse/ClickHouse/issues/18311): Fix inserting a row with default value in case of parsing error in the last column. Fixes [#17712](https://github.com/ClickHouse/ClickHouse/issues/17712). [#18182](https://github.com/ClickHouse/ClickHouse/pull/18182) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Backported in [#18229](https://github.com/ClickHouse/ClickHouse/issues/18229): Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message ` MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#18630](https://github.com/ClickHouse/ClickHouse/issues/18630): `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#19202](https://github.com/ClickHouse/ClickHouse/issues/19202): `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#19162](https://github.com/ClickHouse/ClickHouse/issues/19162): Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#19716](https://github.com/ClickHouse/ClickHouse/issues/19716): Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#18601](https://github.com/ClickHouse/ClickHouse/issues/18601): Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)). +* Backported in [#18577](https://github.com/ClickHouse/ClickHouse/issues/18577): Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#18606](https://github.com/ClickHouse/ClickHouse/issues/18606): Add FixedString Data type support. I'll get this exception "Code: 50, e.displayText() = DB::Exception: Unsupported type FixedString(1)" when replicating data from MySQL to ClickHouse. This patch fixes bug [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)). +* Backported in [#18643](https://github.com/ClickHouse/ClickHouse/issues/18643): Fix removing of empty parts in `ReplicatedMergeTree` tables, created with old syntax. Fixes [#18582](https://github.com/ClickHouse/ClickHouse/issues/18582). [#18614](https://github.com/ClickHouse/ClickHouse/pull/18614) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#18738](https://github.com/ClickHouse/ClickHouse/issues/18738): Fix Logger with unmatched arg size. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)). +* Backported in [#18949](https://github.com/ClickHouse/ClickHouse/issues/18949): Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#18802](https://github.com/ClickHouse/ClickHouse/issues/18802): Asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#18835](https://github.com/ClickHouse/ClickHouse/issues/18835): Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#18909](https://github.com/ClickHouse/ClickHouse/issues/18909): Fix possible hang at shutdown in clickhouse-local. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19114](https://github.com/ClickHouse/ClickHouse/issues/19114): Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Backported in [#18967](https://github.com/ClickHouse/ClickHouse/issues/18967): Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* Backported in [#19008](https://github.com/ClickHouse/ClickHouse/issues/19008): Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). +* Backported in [#19192](https://github.com/ClickHouse/ClickHouse/issues/19192): Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19094](https://github.com/ClickHouse/ClickHouse/issues/19094): Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19068](https://github.com/ClickHouse/ClickHouse/issues/19068): Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#19051](https://github.com/ClickHouse/ClickHouse/issues/19051): Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19083](https://github.com/ClickHouse/ClickHouse/issues/19083): Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19047](https://github.com/ClickHouse/ClickHouse/issues/19047): Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19128](https://github.com/ClickHouse/ClickHouse/issues/19128): Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#19565](https://github.com/ClickHouse/ClickHouse/issues/19565): Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19232](https://github.com/ClickHouse/ClickHouse/issues/19232): Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Backported in [#19180](https://github.com/ClickHouse/ClickHouse/issues/19180): Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19269](https://github.com/ClickHouse/ClickHouse/issues/19269): Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Backported in [#19424](https://github.com/ClickHouse/ClickHouse/issues/19424): Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19553](https://github.com/ClickHouse/ClickHouse/issues/19553): Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19469](https://github.com/ClickHouse/ClickHouse/issues/19469): - Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#19437](https://github.com/ClickHouse/ClickHouse/issues/19437): Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19612](https://github.com/ClickHouse/ClickHouse/issues/19612): Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19664](https://github.com/ClickHouse/ClickHouse/issues/19664): Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19508](https://github.com/ClickHouse/ClickHouse/issues/19508): Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19541](https://github.com/ClickHouse/ClickHouse/issues/19541): Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19642](https://github.com/ClickHouse/ClickHouse/issues/19642): Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#19638](https://github.com/ClickHouse/ClickHouse/issues/19638): `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19651](https://github.com/ClickHouse/ClickHouse/issues/19651): Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#19741](https://github.com/ClickHouse/ClickHouse/issues/19741): Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19699](https://github.com/ClickHouse/ClickHouse/issues/19699): Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19818](https://github.com/ClickHouse/ClickHouse/issues/19818): Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#19781](https://github.com/ClickHouse/ClickHouse/issues/19781): Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#19871](https://github.com/ClickHouse/ClickHouse/issues/19871): Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#19811](https://github.com/ClickHouse/ClickHouse/issues/19811): In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19941](https://github.com/ClickHouse/ClickHouse/issues/19941): Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#19937](https://github.com/ClickHouse/ClickHouse/issues/19937): BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). + diff --git a/docs/changelogs/v20.12.6.29-stable.md b/docs/changelogs/v20.12.6.29-stable.md new file mode 100644 index 00000000000..ac4b27e5ade --- /dev/null +++ b/docs/changelogs/v20.12.6.29-stable.md @@ -0,0 +1,18 @@ +### ClickHouse release v20.12.6.29-stable FIXME as compared to v20.12.5.18-stable + +#### Bug Fix +* Backported in [#19984](https://github.com/ClickHouse/ClickHouse/issues/19984): Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Backported in [#20240](https://github.com/ClickHouse/ClickHouse/issues/20240): Fix a bug that moving pieces to destination table may failed in case of launching multiple clickhouse-copiers. [#19743](https://github.com/ClickHouse/ClickHouse/pull/19743) ([madianjun](https://github.com/mdianjun)). +* Backported in [#20080](https://github.com/ClickHouse/ClickHouse/issues/20080): Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#20298](https://github.com/ClickHouse/ClickHouse/issues/20298): * Bugfix in StorageJoin. [#20079](https://github.com/ClickHouse/ClickHouse/pull/20079) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#20391](https://github.com/ClickHouse/ClickHouse/issues/20391): The `MongoDB` table engine now establishes connection only when it's going to read data. `ATTACH TABLE` won't try to connect anymore. [#20110](https://github.com/ClickHouse/ClickHouse/pull/20110) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#20328](https://github.com/ClickHouse/ClickHouse/issues/20328): Fix rare server crash on config reload during the shutdown. Fixes [#19689](https://github.com/ClickHouse/ClickHouse/issues/19689). [#20224](https://github.com/ClickHouse/ClickHouse/pull/20224) ([alesapin](https://github.com/alesapin)). +* Backported in [#20332](https://github.com/ClickHouse/ClickHouse/issues/20332): Restrict to `DROP` or `RENAME` version column of `*CollapsingMergeTree` and `ReplacingMergeTree` table engines. [#20300](https://github.com/ClickHouse/ClickHouse/pull/20300) ([alesapin](https://github.com/alesapin)). +* Backported in [#20364](https://github.com/ClickHouse/ClickHouse/issues/20364): Fix too often retries of failed background tasks for `ReplicatedMergeTree` table engines family. This could lead to too verbose logging and increased CPU load. Fixes [#20203](https://github.com/ClickHouse/ClickHouse/issues/20203). [#20335](https://github.com/ClickHouse/ClickHouse/pull/20335) ([alesapin](https://github.com/alesapin)). +* Backported in [#20377](https://github.com/ClickHouse/ClickHouse/issues/20377): Fix null dereference with `join_use_nulls=1`. [#20344](https://github.com/ClickHouse/ClickHouse/pull/20344) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20360](https://github.com/ClickHouse/ClickHouse/issues/20360): Avoid invalid dereference in RANGE_HASHED() dictionary. [#20345](https://github.com/ClickHouse/ClickHouse/pull/20345) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#20224](https://github.com/ClickHouse/ClickHouse/issues/20224) to 20.12: Fix access control manager destruction order"'. [#20396](https://github.com/ClickHouse/ClickHouse/pull/20396) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.12.7.3-stable.md b/docs/changelogs/v20.12.7.3-stable.md new file mode 100644 index 00000000000..676ab4f7d11 --- /dev/null +++ b/docs/changelogs/v20.12.7.3-stable.md @@ -0,0 +1,9 @@ +### ClickHouse release v20.12.7.3-stable FIXME as compared to v20.12.6.29-stable + +#### Bug Fix +* Backported in [#20683](https://github.com/ClickHouse/ClickHouse/issues/20683): Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#20635](https://github.com/ClickHouse/ClickHouse/issues/20635): Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). +* Backported in [#20617](https://github.com/ClickHouse/ClickHouse/issues/20617): Check if table function `view` is used in expression list and throw an error. This fixes [#20342](https://github.com/ClickHouse/ClickHouse/issues/20342). [#20350](https://github.com/ClickHouse/ClickHouse/pull/20350) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#20487](https://github.com/ClickHouse/ClickHouse/issues/20487): Fix `LOGICAL_ERROR` for `join_use_nulls=1` when JOIN contains const from SELECT. [#20461](https://github.com/ClickHouse/ClickHouse/pull/20461) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20614](https://github.com/ClickHouse/ClickHouse/issues/20614): Add proper checks while parsing directory names for async INSERT (fixes SIGSEGV). [#20498](https://github.com/ClickHouse/ClickHouse/pull/20498) ([Azat Khuzhin](https://github.com/azat)). + diff --git a/docs/changelogs/v20.12.8.5-stable.md b/docs/changelogs/v20.12.8.5-stable.md new file mode 100644 index 00000000000..48e20c46928 --- /dev/null +++ b/docs/changelogs/v20.12.8.5-stable.md @@ -0,0 +1,9 @@ +### ClickHouse release v20.12.8.5-stable FIXME as compared to v20.12.7.3-stable + +#### Bug Fix +* Backported in [#20592](https://github.com/ClickHouse/ClickHouse/issues/20592): Fixed inconsistent behavior of dictionary in case of queries where we look for absent keys in dictionary. [#20578](https://github.com/ClickHouse/ClickHouse/pull/20578) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#21048](https://github.com/ClickHouse/ClickHouse/issues/21048): Fix usage of `-Distinct` combinator with `-State` combinator in aggregate functions. [#20866](https://github.com/ClickHouse/ClickHouse/pull/20866) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#21133](https://github.com/ClickHouse/ClickHouse/issues/21133): Fixed behaviour, when `ALTER MODIFY COLUMN` created mutation, that will knowingly fail. [#21007](https://github.com/ClickHouse/ClickHouse/pull/21007) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#21249](https://github.com/ClickHouse/ClickHouse/issues/21249): - Block parallel insertions into storage join. [#21009](https://github.com/ClickHouse/ClickHouse/pull/21009) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#21230](https://github.com/ClickHouse/ClickHouse/issues/21230): Fixes [#21112](https://github.com/ClickHouse/ClickHouse/issues/21112). Fixed bug that could cause duplicates with insert query (if one of the callbacks came a little too late). [#21138](https://github.com/ClickHouse/ClickHouse/pull/21138) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v20.5.1.3833-prestable.md b/docs/changelogs/v20.5.1.3833-prestable.md new file mode 100644 index 00000000000..824fb051914 --- /dev/null +++ b/docs/changelogs/v20.5.1.3833-prestable.md @@ -0,0 +1,380 @@ +### ClickHouse release v20.5.1.3833-prestable FIXME as compared to v20.4.1.3177-prestable + +#### Backward Incompatible Change +* Remove `experimental_use_processors` setting. It is enabled by default. [#10924](https://github.com/ClickHouse/ClickHouse/pull/10924) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The setting `input_format_with_names_use_header` is enabled by default. It will affect parsing of input formats `-WithNames` and `-WithNamesAndTypes`. [#10937](https://github.com/ClickHouse/ClickHouse/pull/10937) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added a check for the case when user-level setting is specified in a wrong place. User-level settings should be specified in `users.xml` inside `` section for specific user profile (or in `` for default settings). The server won't start with exception message in log. This fixes [#9051](https://github.com/ClickHouse/ClickHouse/issues/9051). If you want to skip the check, you can either move settings to the appropriate place or add `1` to config.xml. [#11449](https://github.com/ClickHouse/ClickHouse/pull/11449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Return non-Nullable result from COUNT(DISTINCT), and `uniq` aggregate functions family. If all passed values are NULL, return zero instead. This improves SQL compatibility. [#11661](https://github.com/ClickHouse/ClickHouse/pull/11661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Support Cassandra as external dictionary. [#4978](https://github.com/ClickHouse/ClickHouse/pull/4978) ([favstovol](https://github.com/favstovol)). +* `SimpleAggregateFunction` now also supports `sumMap`. [#10000](https://github.com/ClickHouse/ClickHouse/pull/10000) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Added OpenCl support and bitonic sort algorithm, which can be used for sorting integer types of data in single column. Needs to be build with flag `-DENABLE_OPENCL=1`. For using bitonic sort algorithm instead of others you need to set `bitonic_sort` for Setting's option `special_sort` and make sure that OpenCL is available. [#10232](https://github.com/ClickHouse/ClickHouse/pull/10232) ([Margarita Konnova [MARK]](https://github.com/margaritiko)). +* Implementation of PostgreSQL wire protocol. [#10242](https://github.com/ClickHouse/ClickHouse/pull/10242) ([Movses Elbakian](https://github.com/MovElb)). +* Selects with final are executed in parallel. Added setting `max_final_threads` to limit the number of threads used. [#10463](https://github.com/ClickHouse/ClickHouse/pull/10463) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Function that extracts from haystack all matching non-overlapping groups with regular expressions, and put those into `Array(Array(String))` column. [#10534](https://github.com/ClickHouse/ClickHouse/pull/10534) ([Vasily Nemkov](https://github.com/Enmk)). +* Added ability to delete a subset of expired rows, which satisfies the condition in WHERE clause. Added ability to replace expired rows with aggregates of them specified in GROUP BY clause. [#10537](https://github.com/ClickHouse/ClickHouse/pull/10537) ([expl0si0nn](https://github.com/expl0si0nn)). +* (Only Linux) Clickhouse server now tries to fallback to ProcfsMetricsProvider when clickhouse binary is not attributed with CAP_NET_ADMIN capability to collect per-query system metrics (for CPU and I/O). [#10544](https://github.com/ClickHouse/ClickHouse/pull/10544) ([Alexander Kazakov](https://github.com/Akazz)). +* - Add Arrow IPC File format (Input and Output) - Fix incorrect work of resetParser() for Parquet Input Format - Add zero-copy optimization for ORC for RandomAccessFiles - Add missing halffloat type for input parquet and ORC formats ... [#10580](https://github.com/ClickHouse/ClickHouse/pull/10580) ([Zhanna](https://github.com/FawnD2)). +* Allowed to profile memory with finer granularity steps than 4 MiB. Added sampling memory profiler to capture random allocations/deallocations. [#10598](https://github.com/ClickHouse/ClickHouse/pull/10598) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add new input format `JSONAsString` that accepts a sequence of JSON objects separated by newlines, spaces and/or commas. [#10607](https://github.com/ClickHouse/ClickHouse/pull/10607) ([Kruglov Pavel](https://github.com/Avogar)). +* Added a new layout ```direct``` which loads all the data directly from the source for each query, without storing or caching data. [#10622](https://github.com/ClickHouse/ClickHouse/pull/10622) ([Artem Streltsov](https://github.com/kekekekule)). +* Default user and database creation on docker image starting. [#10637](https://github.com/ClickHouse/ClickHouse/pull/10637) ([Paramtamtam](https://github.com/tarampampam)). +* Add data type Point (Tuple(Float64, Float64)) and Polygon (Array(Array(Tuple(Float64, Float64))). [#10678](https://github.com/ClickHouse/ClickHouse/pull/10678) ([Alexey Ilyukhov](https://github.com/livace)). +* New function function toStartOfSecond(DateTime64) -> DateTime64 that nullifies sub-second part of DateTime64 value. [#10722](https://github.com/ClickHouse/ClickHouse/pull/10722) ([Vasily Nemkov](https://github.com/Enmk)). +* Added a function `randomString` that generates binary string with random bytes (including zero bytes). [#10733](https://github.com/ClickHouse/ClickHouse/pull/10733) ([Andrei Nekrashevich](https://github.com/axolm)). +* Added `system.licenses` table. This table contains licenses of third-party libraries that are located in `contrib` directory. This closes [#2890](https://github.com/ClickHouse/ClickHouse/issues/2890). [#10795](https://github.com/ClickHouse/ClickHouse/pull/10795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* OFFSET keyword can now be used without an affiliated LIMIT clause. [#10802](https://github.com/ClickHouse/ClickHouse/pull/10802) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Added new complex key direct layout to dictionaries, that does not store anything locally during query execution. [#10850](https://github.com/ClickHouse/ClickHouse/pull/10850) ([Artem Streltsov](https://github.com/kekekekule)). +* Added function `randomFixedString`. [#10866](https://github.com/ClickHouse/ClickHouse/pull/10866) ([Andrei Nekrashevich](https://github.com/axolm)). +* Support `ALTER RENAME COLUMN` for the distributed table engine. Continuation of [#10727](https://github.com/ClickHouse/ClickHouse/issues/10727). Fixes [#10747](https://github.com/ClickHouse/ClickHouse/issues/10747). [#10887](https://github.com/ClickHouse/ClickHouse/pull/10887) ([alesapin](https://github.com/alesapin)). +* Support writes in ODBC Table function [#10554](https://github.com/ClickHouse/ClickHouse/pull/10554) ([ageraab](https://github.com/ageraab)). [#10901](https://github.com/ClickHouse/ClickHouse/pull/10901) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow specifying `mongodb://` URI for MongoDB dictionaries. [#10915](https://github.com/ClickHouse/ClickHouse/pull/10915) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Added new functions to import/export DateTime64 as Int64 with various precision: `to-/fromUnixTimestamp64Milli/-Micro/-Nano`. [#10923](https://github.com/ClickHouse/ClickHouse/pull/10923) ([Vasily Nemkov](https://github.com/Enmk)). +* Now support NULL and NOT NULL modifiers for data types in create query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). +* Add ArrowStream input and output format. [#11088](https://github.com/ClickHouse/ClickHouse/pull/11088) ([hcz](https://github.com/hczhcz)). +* Default S3 credentials and custom auth headers. [#11134](https://github.com/ClickHouse/ClickHouse/pull/11134) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* Add query performance metrics based on Linux `perf_events`. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Use HTTP client for S3 based on Poco. [#11230](https://github.com/ClickHouse/ClickHouse/pull/11230) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add function `fuzzBits` that randomly flips bits in a string with given probability. [#11237](https://github.com/ClickHouse/ClickHouse/pull/11237) ([Andrei Nekrashevich](https://github.com/axolm)). +* Add `_timestamp_ms` virtual column for Kafka engine (type is `Nullable(DateTime64(3))`). [#11260](https://github.com/ClickHouse/ClickHouse/pull/11260) ([filimonov](https://github.com/filimonov)). +* Add 2 more virtual columns for engine=Kafka to access message headers. [#11283](https://github.com/ClickHouse/ClickHouse/pull/11283) ([filimonov](https://github.com/filimonov)). +* Add `netloc` function for extracting network location, similar to `urlparse(url)`, `netloc` in python. [#11356](https://github.com/ClickHouse/ClickHouse/pull/11356) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Added syntax highligting to clickhouse-client using ReplXX. [#11422](https://github.com/ClickHouse/ClickHouse/pull/11422) ([Tagir Kuskarov](https://github.com/kuskarov)). +* Add SHOW CLUSTER(S) queries. [#11467](https://github.com/ClickHouse/ClickHouse/pull/11467) ([hexiaoting](https://github.com/hexiaoting)). +* Add functions `extractAllGroupsHorizontal(haystack, re)` and `extractAllGroupsVertical(haystack, re)`. [#11554](https://github.com/ClickHouse/ClickHouse/pull/11554) ([Vasily Nemkov](https://github.com/Enmk)). +* Add the `system.asynchronous_metric_log` table that logs historical metrics from `system.asynchronous_metrics`. [#11588](https://github.com/ClickHouse/ClickHouse/pull/11588) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* `minMap` and `maxMap` functions were added. [#11603](https://github.com/ClickHouse/ClickHouse/pull/11603) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Added support for MySQL style global variables syntax (stub). This is needed for compatibility of MySQL protocol. [#11832](https://github.com/ClickHouse/ClickHouse/pull/11832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Performance Improvement +* Optimization of GROUP BY with respect to table sorting key. [#9113](https://github.com/ClickHouse/ClickHouse/pull/9113) ([Dmitry Rubashkin](https://github.com/dimarub2000)). +* New optimization that takes arithmetic operations out of aggregate functions. [#10047](https://github.com/ClickHouse/ClickHouse/pull/10047) ([Ruslan](https://github.com/kamalov-ruslan)). +* This feature eliminates functions of other keys in GROUP BY section. [#10051](https://github.com/ClickHouse/ClickHouse/pull/10051) ([Victor Grishanin](https://github.com/xPoSx)). +* Add runtime CPU detection to select and dispatch the best function implementation. Add support for codegeneration for multiple targets. This closes [#1017](https://github.com/ClickHouse/ClickHouse/issues/1017). [#10058](https://github.com/ClickHouse/ClickHouse/pull/10058) ([DimasKovas](https://github.com/DimasKovas)). +* Remove duplicate ORDER BY and DISTINCT from subqueries. [#10067](https://github.com/ClickHouse/ClickHouse/pull/10067) ([Mikhail Malafeev](https://github.com/demo-99)). +* Sort bigger parts of the left table in MergeJoin. Buffer left blocks in memory. Add `partial_merge_join_left_table_buffer_bytes` setting to manage the left blocks buffers sizes. [#10601](https://github.com/ClickHouse/ClickHouse/pull/10601) ([Artem Zuikov](https://github.com/4ertus2)). +* Get dictionary and check access rights only once per each call of any function reading external dictionaries. [#10928](https://github.com/ClickHouse/ClickHouse/pull/10928) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improving radix sort by removing some redundant data moves. [#10981](https://github.com/ClickHouse/ClickHouse/pull/10981) ([Arslan Gumerov](https://github.com/g-arslan)). +* Make queries with `sum` aggregate function and without GROUP BY keys to run multiple times faster. [#10992](https://github.com/ClickHouse/ClickHouse/pull/10992) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable mlock of clickhouse binary by default. It will prevent clickhouse executable from being paged out under high IO load. [#11139](https://github.com/ClickHouse/ClickHouse/pull/11139) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improved performance for queries with `ORDER BY` and small `LIMIT` (less, then `max_block_size`). [#11171](https://github.com/ClickHouse/ClickHouse/pull/11171) ([Provet](https://github.com/Provet)). +* Improve performance for INSERT queries via INSERT SELECT or INSERT with clickhouse-client when small blocks are generated (typical case with parallel parsing). This fixes [#11275](https://github.com/ClickHouse/ClickHouse/issues/11275). Fix the issue that CONSTRAINTs were not working for DEFAULT fields. This fixes [#11273](https://github.com/ClickHouse/ClickHouse/issues/11273). Fix the issue that CONSTRAINTS were ignored for TEMPORARY tables. This fixes [#11274](https://github.com/ClickHouse/ClickHouse/issues/11274). [#11276](https://github.com/ClickHouse/ClickHouse/pull/11276) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of `clickhouse-client` in interactive mode when Pretty formats are used. In previous versions, significant amount of time can be spent calculating visible width of UTF-8 string. This closes [#11323](https://github.com/ClickHouse/ClickHouse/issues/11323). [#11323](https://github.com/ClickHouse/ClickHouse/pull/11323) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* New optimization that takes all operations out of "any" function. [#11529](https://github.com/ClickHouse/ClickHouse/pull/11529) ([Ruslan](https://github.com/kamalov-ruslan)). +* Speed up merging in AggregatingMergeTree. This fixes performance regression that was introduced more than a year ago in [#4348](https://github.com/ClickHouse/ClickHouse/issues/4348) (in version 19.3). [#11534](https://github.com/ClickHouse/ClickHouse/pull/11534) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow multiple replicas to assign merges, mutations, partition drop, move and replace concurrently. This closes [#10367](https://github.com/ClickHouse/ClickHouse/issues/10367). [#11639](https://github.com/ClickHouse/ClickHouse/pull/11639) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This optimization eliminates min/max/any aggregators of GROUP BY keys in SELECT section. [#11667](https://github.com/ClickHouse/ClickHouse/pull/11667) ([Victor Grishanin](https://github.com/xPoSx)). + +#### Improvement +* Allow to pass quota_key in clickhouse-client. This closes [#10227](https://github.com/ClickHouse/ClickHouse/issues/10227). [#10270](https://github.com/ClickHouse/ClickHouse/pull/10270) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added system tables for users, roles, grants, settings profiles, quotas, row policies; added commands SHOW USER, SHOW [CURRENT|ENABLED] ROLES, SHOW SETTINGS PROFILES. [#10387](https://github.com/ClickHouse/ClickHouse/pull/10387) ([Vitaly Baranov](https://github.com/vitlibar)). +* - Adding support for `INSERT INTO [db.]table WATCH` query. [#10498](https://github.com/ClickHouse/ClickHouse/pull/10498) ([vzakaznikov](https://github.com/vzakaznikov)). +* Possibility to work with S3 through proxies. [#10576](https://github.com/ClickHouse/ClickHouse/pull/10576) ([Pavel Kovalenko](https://github.com/Jokser)). +* Added `move_ttl_info` to `system.parts` in order to provide introspection of move TTL functionality. [#10591](https://github.com/ClickHouse/ClickHouse/pull/10591) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Make pointInPolygon work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)). +* Print a message if clickhouse-client is newer than clickhouse-server. [#10627](https://github.com/ClickHouse/ClickHouse/pull/10627) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change HTTP response code in case of some parse errors to 400 Bad Request. This fix [#10636](https://github.com/ClickHouse/ClickHouse/issues/10636). [#10640](https://github.com/ClickHouse/ClickHouse/pull/10640) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adding support for ALTER RENAME COLUMN query to Distributed table engine. [#10727](https://github.com/ClickHouse/ClickHouse/pull/10727) ([vzakaznikov](https://github.com/vzakaznikov)). +* Possibility to configure proxy-resolver for DiskS3. [#10744](https://github.com/ClickHouse/ClickHouse/pull/10744) ([Pavel Kovalenko](https://github.com/Jokser)). +* Better DNS exception message. This fixes [#10813](https://github.com/ClickHouse/ClickHouse/issues/10813). [#10828](https://github.com/ClickHouse/ClickHouse/pull/10828) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ensure that `varSamp`, `varPop` cannot return negative results due to numerical errors and that `stddevSamp`, `stddevPop` cannot be calculated from negative variance. This fixes [#10532](https://github.com/ClickHouse/ClickHouse/issues/10532). [#10829](https://github.com/ClickHouse/ClickHouse/pull/10829) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Get rid of exception from replicated queue during server shutdown. Fixes [#10819](https://github.com/ClickHouse/ClickHouse/issues/10819). [#10841](https://github.com/ClickHouse/ClickHouse/pull/10841) ([alesapin](https://github.com/alesapin)). +* The `clickhouse-format` tool is now able to format multiple queries when the `-n` argument is used. [#10852](https://github.com/ClickHouse/ClickHouse/pull/10852) ([Darío](https://github.com/dgrr)). +* Provide synonims for some data types. [#10856](https://github.com/ClickHouse/ClickHouse/pull/10856) ([Павел Потемкин](https://github.com/Potya)). +* Introduce `min_insert_block_size_rows_for_materialized_views ` , `min_insert_block_size_bytes_for_materialized_views` settings. This settings are similar to `min_insert_block_size_rows` and `min_insert_block_size_bytes`, but applied only for blocks inserted into `MATERIALIZED VIEW`. It helps to control blocks squashing while pushing to MVs and avoid excessive memory usage. [#10858](https://github.com/ClickHouse/ClickHouse/pull/10858) ([Azat Khuzhin](https://github.com/azat)). +* Respect prefer_localhost_replica/load_balancing on INSERT into Distributed. [#10867](https://github.com/ClickHouse/ClickHouse/pull/10867) ([Azat Khuzhin](https://github.com/azat)). +* Allow large UInt types as the index in function `tupleElement`. [#10874](https://github.com/ClickHouse/ClickHouse/pull/10874) ([hcz](https://github.com/hczhcz)). +* Support for unicode whitespaces in queries. This helps when queries are copy-pasted from Word or from web page. This fixes [#10896](https://github.com/ClickHouse/ClickHouse/issues/10896). [#10903](https://github.com/ClickHouse/ClickHouse/pull/10903) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some additions and cleanup for [#10232](https://github.com/ClickHouse/ClickHouse/issues/10232). [#10934](https://github.com/ClickHouse/ClickHouse/pull/10934) ([Artem Zuikov](https://github.com/4ertus2)). +* Set thread names for internal threads of rdkafka library. Make logs from rdkafka available in server logs. [#10983](https://github.com/ClickHouse/ClickHouse/pull/10983) ([Azat Khuzhin](https://github.com/azat)). +* Remove data on explicit `DROP DATABASE` for `Memory` database engine. Fixes [#10557](https://github.com/ClickHouse/ClickHouse/issues/10557). [#11021](https://github.com/ClickHouse/ClickHouse/pull/11021) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add `NCHAR` and `NVARCHAR` synonims for data types. [#11025](https://github.com/ClickHouse/ClickHouse/pull/11025) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Apply `TTL` for old data, after `ALTER MODIFY TTL` query. This behaviour is controlled by setting `materialize_ttl_after_modify`, which is enabled by default. [#11042](https://github.com/ClickHouse/ClickHouse/pull/11042) ([Anton Popov](https://github.com/CurtizJ)). +* Now `dictGet*` functions accept table names. [#11050](https://github.com/ClickHouse/ClickHouse/pull/11050) ([Vitaly Baranov](https://github.com/vitlibar)). +* Show authentication type in table system.users and while executing SHOW CREATE USER query. [#11080](https://github.com/ClickHouse/ClickHouse/pull/11080) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable percpu_arena:percpu for jemalloc (This will reduce memory fragmentation due to thread pool). [#11084](https://github.com/ClickHouse/ClickHouse/pull/11084) ([Azat Khuzhin](https://github.com/azat)). +* Add port() function (to extract port from URL). [#11120](https://github.com/ClickHouse/ClickHouse/pull/11120) ([Azat Khuzhin](https://github.com/azat)). +* Resolved [#7224](https://github.com/ClickHouse/ClickHouse/issues/7224): added `FailedQuery`, `FailedSelectQuery` and `FailedInsertQuery` metrics to `system.events` table. [#11151](https://github.com/ClickHouse/ClickHouse/pull/11151) ([Nikita Orlov](https://github.com/naorlov)). +* The query log is now enabled by default. [#11184](https://github.com/ClickHouse/ClickHouse/pull/11184) ([Ivan Blinkov](https://github.com/blinkov)). +* When parsing C-style backslash escapes in string literals, VALUES and various text formats (this is an extension to SQL standard that is endemic for ClickHouse and MySQL), keep backslash if unknown escape sequence is found (e.g. `\%` or `\w`) that will make usage of `LIKE` and `match` regular expressions more convenient (it's enough to write `name LIKE 'used\_cars'` instead of `name LIKE 'used\\_cars'`) and more compatible at the same time. This fixes [#10922](https://github.com/ClickHouse/ClickHouse/issues/10922). [#11208](https://github.com/ClickHouse/ClickHouse/pull/11208) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for multi-word data type names (such as `DOUBLE PRECISION` and `CHAR VARYING`) for better SQL compatibility. [#11214](https://github.com/ClickHouse/ClickHouse/pull/11214) ([Павел Потемкин](https://github.com/Potya)). +* Keep the value of `DistributedFilesToInsert` metric on exceptions. In previous versions, the value was set when we are going to send some files, but it is zero, if there was an exception and some files are still pending. Now it corresponds to the number of pending files in filesystem. [#11220](https://github.com/ClickHouse/ClickHouse/pull/11220) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support kafka_client_id parameter for Kafka tables. It also changes the default `client.id` used by ClickHouse when communicating with Kafka to be more verbose and usable. [#11252](https://github.com/ClickHouse/ClickHouse/pull/11252) ([filimonov](https://github.com/filimonov)). +* Update librdkafka to version [1.4.2](https://github.com/edenhill/librdkafka/releases/tag/v1.4.2). [#11256](https://github.com/ClickHouse/ClickHouse/pull/11256) ([filimonov](https://github.com/filimonov)). +* Support (U)Int8, (U)Int16, Date in ASOF JOIN. [#11301](https://github.com/ClickHouse/ClickHouse/pull/11301) ([Artem Zuikov](https://github.com/4ertus2)). +* Better exception message in case when there is shortage of memory mappings. This closes [#11027](https://github.com/ClickHouse/ClickHouse/issues/11027). [#11316](https://github.com/ClickHouse/ClickHouse/pull/11316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add setting "output_format_pretty_max_value_width". If value is longer, it will be cut to avoid output of too large values in terminal. This closes [#11140](https://github.com/ClickHouse/ClickHouse/issues/11140). [#11324](https://github.com/ClickHouse/ClickHouse/pull/11324) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove trailing whitespaces from formatted queries in `clickhouse-client` or `clickhouse-format` in some cases. [#11325](https://github.com/ClickHouse/ClickHouse/pull/11325) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better log messages in while reloading configuration. [#11341](https://github.com/ClickHouse/ClickHouse/pull/11341) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Suppress output of cancelled queries in clickhouse-client. In previous versions result may continue to print in terminal even after you press Ctrl+C to cancel query. This closes [#9473](https://github.com/ClickHouse/ClickHouse/issues/9473). [#11342](https://github.com/ClickHouse/ClickHouse/pull/11342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* OPTIMIZE FINAL will force merge even if concurrent merges are performed. This closes [#11309](https://github.com/ClickHouse/ClickHouse/issues/11309) and closes [#11322](https://github.com/ClickHouse/ClickHouse/issues/11322). [#11346](https://github.com/ClickHouse/ClickHouse/pull/11346) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support for all format settings in Kafka, expose some setting on table level, adjust the defaults for better performance. [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388) ([filimonov](https://github.com/filimonov)). +* Add system.distribution_queue table. [#11394](https://github.com/ClickHouse/ClickHouse/pull/11394) ([Azat Khuzhin](https://github.com/azat)). +* ON CLUSTER support for SYSTEM {FLUSH DISTRIBUTED,STOP/START DISTRIBUTED SEND}. [#11415](https://github.com/ClickHouse/ClickHouse/pull/11415) ([Azat Khuzhin](https://github.com/azat)). +* Now history file is updated after each query and there is no race condition if multiple clients use one history file. This fixes [#9897](https://github.com/ClickHouse/ClickHouse/issues/9897). [#11453](https://github.com/ClickHouse/ClickHouse/pull/11453) ([Tagir Kuskarov](https://github.com/kuskarov)). +* Automatically update DNS cache, which is used to check if user is allowed to connect from an address. [#11487](https://github.com/ClickHouse/ClickHouse/pull/11487) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Adding support for PREWHERE in live view tables. [#11495](https://github.com/ClickHouse/ClickHouse/pull/11495) ([vzakaznikov](https://github.com/vzakaznikov)). +* Improve `enable_optimize_predicate_expression=1` logic for VIEW. [#11513](https://github.com/ClickHouse/ClickHouse/pull/11513) ([Artem Zuikov](https://github.com/4ertus2)). +* Better exception message when cannot parse columns declaration list. This closes [#10403](https://github.com/ClickHouse/ClickHouse/issues/10403). [#11537](https://github.com/ClickHouse/ClickHouse/pull/11537) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Optimize memory usage when reading a response from an S3 HTTP client. [#11561](https://github.com/ClickHouse/ClickHouse/pull/11561) ([Pavel Kovalenko](https://github.com/Jokser)). +* Improve `multiple_joins_rewriter_version=2` logic. Fix unknown columns error for lambda aliases. [#11587](https://github.com/ClickHouse/ClickHouse/pull/11587) ([Artem Zuikov](https://github.com/4ertus2)). +* Make more input format work with Kafka engine. Fix the issue with premature flushes. Fix the performance issue when `kafka_num_consumers` is greater than number of partitions in topic. [#11599](https://github.com/ClickHouse/ClickHouse/pull/11599) ([filimonov](https://github.com/filimonov)). +* https://github.com/ClickHouse/ClickHouse/pull/7572#issuecomment-642815377 Support config default HTTPHandlers. [#11628](https://github.com/ClickHouse/ClickHouse/pull/11628) ([Winter Zhang](https://github.com/zhang2014)). +* Add round_robin load_balancing. [#11645](https://github.com/ClickHouse/ClickHouse/pull/11645) ([Azat Khuzhin](https://github.com/azat)). +* Allow comparison of numbers with constant string in comparison operators, IN and VALUES sections. [#11647](https://github.com/ClickHouse/ClickHouse/pull/11647) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow comparison with constant strings by implicit conversions when analysing index conditions on other types. This may close [#11630](https://github.com/ClickHouse/ClickHouse/issues/11630). [#11648](https://github.com/ClickHouse/ClickHouse/pull/11648) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow using `groupArrayArray` and `groupUniqArrayArray` as `SimpleAggregateFunction`. [#11650](https://github.com/ClickHouse/ClickHouse/pull/11650) ([Volodymyr Kuznetsov](https://github.com/ksvladimir)). +* Skip empty parameters in requested URL. They may appear when you write `http://localhost:8123/?&a=b` or `http://localhost:8123/?a=b&&c=d`. This closes [#10749](https://github.com/ClickHouse/ClickHouse/issues/10749). [#11651](https://github.com/ClickHouse/ClickHouse/pull/11651) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to DROP replicated table if the metadata in ZooKeeper was already removed and does not exist (this is also the case when using TestKeeper for testing and the server was restarted). Allow to RENAME replicated table even if there is an error communicating with ZooKeeper. This fixes [#10720](https://github.com/ClickHouse/ClickHouse/issues/10720). [#11652](https://github.com/ClickHouse/ClickHouse/pull/11652) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added column `position` to `system.columns` table and `column_position` to `system.parts_columns` table. It contains ordinal position of a column in a table starting with 1. This closes [#7744](https://github.com/ClickHouse/ClickHouse/issues/7744). [#11655](https://github.com/ClickHouse/ClickHouse/pull/11655) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't use debug info from ELF file if it doesn't correspond to the running binary. It is needed to avoid printing wrong function names and source locations in stack traces. This fixes [#7514](https://github.com/ClickHouse/ClickHouse/issues/7514). [#11657](https://github.com/ClickHouse/ClickHouse/pull/11657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clear password from command line in `clickhouse-client` and `clickhouse-benchmark` if the user has specified it with explicit value. This prevents password exposure by `ps` and similar tools. [#11665](https://github.com/ClickHouse/ClickHouse/pull/11665) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Multiple names are now allowed in commands: CREATE USER, CREATE ROLE, ALTER USER, SHOW CREATE USER, SHOW GRANTS and so on. [#11670](https://github.com/ClickHouse/ClickHouse/pull/11670) ([Vitaly Baranov](https://github.com/vitlibar)). +* When multiline query is printed to server log, the lines are joined. Make it to work correct in case of multiline string literals, identifiers and single-line comments. This fixes [#3853](https://github.com/ClickHouse/ClickHouse/issues/3853). [#11686](https://github.com/ClickHouse/ClickHouse/pull/11686) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Emit warning instead of error in server log at startup if we cannot listen one of the listen addresses (e.g. IPv6 is unavailable inside Docker). Note that if server fails to listen all listed addresses, it will refuse to startup as before. This fixes [#4406](https://github.com/ClickHouse/ClickHouse/issues/4406). [#11687](https://github.com/ClickHouse/ClickHouse/pull/11687) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added support for distributed `DDL` (update/delete/drop partition) on cross replication clusters. [#11703](https://github.com/ClickHouse/ClickHouse/pull/11703) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add `cast_keep_nullable` setting. If set `CAST(something_nullable AS Type)` return `Nullable(Type)`. [#11733](https://github.com/ClickHouse/ClickHouse/pull/11733) ([Artem Zuikov](https://github.com/4ertus2)). +* Add more `jemalloc` statistics to `system.asynchronous_metrics`, and ensure that we see up-to-date values for them. [#11748](https://github.com/ClickHouse/ClickHouse/pull/11748) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add ability to set `MATERIALIZED` default type for primary key columns and columns with secondary indices. [#11786](https://github.com/ClickHouse/ClickHouse/pull/11786) ([alesapin](https://github.com/alesapin)). +* Remove leader election, step 3: remove yielding of leadership; remove sending queries to leader. [#11795](https://github.com/ClickHouse/ClickHouse/pull/11795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added `hostname` as an alias to function `hostName`. This feature was suggested by Victor Tarnavskiy from Yandex.Metrica. [#11821](https://github.com/ClickHouse/ClickHouse/pull/11821) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix sleep invocation in signal handler. It was sleeping for less amount of time than expected. [#11825](https://github.com/ClickHouse/ClickHouse/pull/11825) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly improve diagnostic of reading decimal from string. This closes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11829](https://github.com/ClickHouse/ClickHouse/pull/11829) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* When reading Decimal value, cut extra digits after point. This behaviour is more compatible with MySQL and PostgreSQL. This fixes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11831](https://github.com/ClickHouse/ClickHouse/pull/11831) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Fixed error with "max_rows_to_sort" limit. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixes: [#10263](https://github.com/ClickHouse/ClickHouse/issues/10263) (after that PR dist send via INSERT had been postponing on each INSERT) Fixes: [#8756](https://github.com/ClickHouse/ClickHouse/issues/8756) (that PR breaks distributed sends with all of the following conditions met (unlikely setup for now I guess): `internal_replication == false`, multiple local shards (activates the hardlinking code) and `distributed_storage_policy` (makes `link(2)` fails on `EXDEV`)). [#10486](https://github.com/ClickHouse/ClickHouse/pull/10486) ([Azat Khuzhin](https://github.com/azat)). +* Disable GROUP BY sharding_key optimization by default (`optimize_distributed_group_by_sharding_key` had been introduced and turned of by default, due to trickery of sharding_key analyzing, simple example is `if` in sharding key) and fix it for WITH ROLLUP/CUBE/TOTALS. [#10516](https://github.com/ClickHouse/ClickHouse/pull/10516) ([Azat Khuzhin](https://github.com/azat)). +* Fix index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)). +* Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)). +* This PR fixes possible crash when `createDictionary()` is called before `loadStoredObject()` has finished. [#10587](https://github.com/ClickHouse/ClickHouse/pull/10587) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed handling condition variable for synchronous mutations. In some cases signals to that condition variable could be lost. [#10588](https://github.com/ClickHouse/ClickHouse/pull/10588) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fixed incorrect scalar results inside inner query of `MATERIALIZED VIEW` in case if this query contained dependent table. [#10603](https://github.com/ClickHouse/ClickHouse/pull/10603) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* On `SYSTEM DROP DNS CACHE` query also drop caches, which are used to check if user is allowed to connect from some IP addresses. [#10608](https://github.com/ClickHouse/ClickHouse/pull/10608) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix segfault in StorageBuffer when exception on server startup. Fixes [#10550](https://github.com/ClickHouse/ClickHouse/issues/10550). [#10609](https://github.com/ClickHouse/ClickHouse/pull/10609) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix optimize_skip_unused_shards with LowCardinality. [#10611](https://github.com/ClickHouse/ClickHouse/pull/10611) ([Azat Khuzhin](https://github.com/azat)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). +* Fix nullptr dereference in StorageBuffer if server was shutdown before table startup. [#10641](https://github.com/ClickHouse/ClickHouse/pull/10641) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bug which locks concurrent alters when table has a lot of parts. [#10659](https://github.com/ClickHouse/ClickHouse/pull/10659) ([alesapin](https://github.com/alesapin)). +* Fix possible incorrect number of rows for queries with `LIMIT`. Fixes [#10566](https://github.com/ClickHouse/ClickHouse/issues/10566), [#10709](https://github.com/ClickHouse/ClickHouse/issues/10709). [#10660](https://github.com/ClickHouse/ClickHouse/pull/10660) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the lack of parallel execution of remote queries with `distributed_aggregation_memory_efficient` enabled. Fixes [#10655](https://github.com/ClickHouse/ClickHouse/issues/10655). [#10664](https://github.com/ClickHouse/ClickHouse/pull/10664) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix order of parameters in AggregateTransform constructor. [#10667](https://github.com/ClickHouse/ClickHouse/pull/10667) ([palasonic1](https://github.com/palasonic1)). +* Fixed bug, which causes http requests stuck on client close when `readonly=2` and `cancel_http_readonly_queries_on_client_close=1`. Fixes [#7939](https://github.com/ClickHouse/ClickHouse/issues/7939), [#7019](https://github.com/ClickHouse/ClickHouse/issues/7019), [#7736](https://github.com/ClickHouse/ClickHouse/issues/7736), [#7091](https://github.com/ClickHouse/ClickHouse/issues/7091). [#10684](https://github.com/ClickHouse/ClickHouse/pull/10684) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix multiple usages of `IN` operator with the identical set in one query. [#10686](https://github.com/ClickHouse/ClickHouse/pull/10686) ([Anton Popov](https://github.com/CurtizJ)). +* Fix atomicity of HTTP insert. This fixes [#9666](https://github.com/ClickHouse/ClickHouse/issues/9666). [#10687](https://github.com/ClickHouse/ClickHouse/pull/10687) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Fix disappearing totals. Totals could have being filtered if query had had join or subquery with external where condition. Fixes [#10674](https://github.com/ClickHouse/ClickHouse/issues/10674). [#10698](https://github.com/ClickHouse/ClickHouse/pull/10698) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible buffer overflow in function `h3EdgeAngle`. [#10711](https://github.com/ClickHouse/ClickHouse/pull/10711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization. [#10715](https://github.com/ClickHouse/ClickHouse/pull/10715) ([Anton Popov](https://github.com/CurtizJ)). +* Fix data corruption for `LowCardinality(FixedString)` key column in `SummingMergeTree` which could have happened after merge. Fixes [#10489](https://github.com/ClickHouse/ClickHouse/issues/10489). [#10721](https://github.com/ClickHouse/ClickHouse/pull/10721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash in `generateRandom` with nested types. Fixes [#10583](https://github.com/ClickHouse/ClickHouse/issues/10583). [#10734](https://github.com/ClickHouse/ClickHouse/pull/10734) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix combinator -OrNull and -OrDefault when combined with -State. [#10741](https://github.com/ClickHouse/ClickHouse/pull/10741) ([hcz](https://github.com/hczhcz)). +* Fix `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix avgWeighted when using floating-point weight over multiple shards. [#10758](https://github.com/ClickHouse/ClickHouse/pull/10758) ([Baudouin Giard](https://github.com/bgiard)). +* Get rid of old libunwind patches. https://github.com/ClickHouse-Extras/libunwind/commit/500aa227911bd185a94bfc071d68f4d3b03cb3b1#r39048012 This allows to disable `-fno-omit-frame-pointer` in `clang` builds that improves performance at least by 1% in average. [#10761](https://github.com/ClickHouse/ClickHouse/pull/10761) ([Amos Bird](https://github.com/amosbird)). +* Make use of `src_type` for correct type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Fix UBSan and MSan report in DateLUT. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the issue with ODBC bridge when no quoting of identifiers is requested. This fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix columns order after Block::sortColumns() (also add a test that shows that it affects some real use case - Buffer engine). [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential read of uninitialized memory in cache dictionary. [#10834](https://github.com/ClickHouse/ClickHouse/pull/10834) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now constraints are updated if the column participating in `CONSTRAINT` expression was renamed. Fixes [#10844](https://github.com/ClickHouse/ClickHouse/issues/10844). [#10847](https://github.com/ClickHouse/ClickHouse/pull/10847) ([alesapin](https://github.com/alesapin)). +* Fixed bug in `ReplicatedMergeTree` which might cause some `ALTER` on `OPTIMIZE` query to hang waiting for some replica after it become inactive. [#10849](https://github.com/ClickHouse/ClickHouse/pull/10849) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed `WATCH` hangs after `LiveView` table was dropped from database with `Atomic` engine. [#10859](https://github.com/ClickHouse/ClickHouse/pull/10859) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix SIGSEGV in StringHashTable (if such key does not exist). [#10870](https://github.com/ClickHouse/ClickHouse/pull/10870) ([Azat Khuzhin](https://github.com/azat)). +* Fix backward compatibility with tuples in Distributed tables. [#10889](https://github.com/ClickHouse/ClickHouse/pull/10889) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible race which could happen when you get result from aggregate function state from multiple thread for the same column. The only way (which I found) it can happen is when you use `finalizeAggregation` function while reading from table with `Memory` engine which stores `AggregateFunction` state for `quanite*` function. [#10890](https://github.com/ClickHouse/ClickHouse/pull/10890) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now it's possible to execute multiple `ALTER RENAME` like `a TO b, c TO a`. [#10895](https://github.com/ClickHouse/ClickHouse/pull/10895) ([alesapin](https://github.com/alesapin)). +* Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#10910](https://github.com/ClickHouse/ClickHouse/pull/10910) ([filimonov](https://github.com/filimonov)). +* Fix crash in `SELECT count(notNullIn(NULL, []))`. [#10920](https://github.com/ClickHouse/ClickHouse/pull/10920) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid sending partially written files by the DistributedBlockOutputStream. [#10940](https://github.com/ClickHouse/ClickHouse/pull/10940) ([Azat Khuzhin](https://github.com/azat)). +* Fix incompatibility of two-level aggregation between versions 20.1 and earlier. This incompatibility happens when different versions of ClickHouse are used on initiator node and remote nodes and the size of GROUP BY result is large and aggregation is performed by a single String field. It leads to several unmerged rows for a single key in result. [#10952](https://github.com/ClickHouse/ClickHouse/pull/10952) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect raw data size in method getRawData(). [#10964](https://github.com/ClickHouse/ClickHouse/pull/10964) ([Igr](https://github.com/ObjatieGroba)). +* Fix server crash on concurrent `ALTER` and `DROP DATABASE` queries with `Atomic` database engine. [#10968](https://github.com/ClickHouse/ClickHouse/pull/10968) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix metadata (relative path for rename) and data (relative path for symlink) handling for Atomic database. [#10980](https://github.com/ClickHouse/ClickHouse/pull/10980) ([Azat Khuzhin](https://github.com/azat)). +* Fix very rare potential use-after-free error in MergeTree if table was not created successfully. [#10986](https://github.com/ClickHouse/ClickHouse/pull/10986) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix memory tracking for two-level GROUP BY when not all rows read from Aggregator (TCP). [#11022](https://github.com/ClickHouse/ClickHouse/pull/11022) ([Azat Khuzhin](https://github.com/azat)). +* Fixed parsing of S3 URLs. [#11036](https://github.com/ClickHouse/ClickHouse/pull/11036) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Now it's possible to `ADD/DROP` and `RENAME` the same one column in a single `ALTER` query. Exception message for simultaneous `MODIFY` and `RENAME` became more clear. Partially fixes [#10669](https://github.com/ClickHouse/ClickHouse/issues/10669). [#11037](https://github.com/ClickHouse/ClickHouse/pull/11037) ([alesapin](https://github.com/alesapin)). +* Fixed parseDateTime64BestEffort argument resolution bugs. [#10925](https://github.com/ClickHouse/ClickHouse/issues/10925). [#11038](https://github.com/ClickHouse/ClickHouse/pull/11038) ([Vasily Nemkov](https://github.com/Enmk)). +* Fixes the potential missed data during termination of Kafka engine table. [#11048](https://github.com/ClickHouse/ClickHouse/pull/11048) ([filimonov](https://github.com/filimonov)). +* Fix error `No such name in Block::erase()` when JOIN appears with PREWHERE or `optimize_move_to_prewhere` makes PREWHERE from WHERE. [#11051](https://github.com/ClickHouse/ClickHouse/pull/11051) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixed memory leak in registerDiskS3. [#11074](https://github.com/ClickHouse/ClickHouse/pull/11074) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fixed deadlock during server startup after update with changes in structure of system log tables. [#11106](https://github.com/ClickHouse/ClickHouse/pull/11106) ([alesapin](https://github.com/alesapin)). +* Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)). +* Fix excessive reserving of threads for simple queries (optimization for reducing the number of threads, which was partly broken after changes in pipeline). [#11114](https://github.com/ClickHouse/ClickHouse/pull/11114) ([Azat Khuzhin](https://github.com/azat)). +* Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#11145](https://github.com/ClickHouse/ClickHouse/pull/11145) ([filimonov](https://github.com/filimonov)). +* Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Fixed S3 globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now `primary.idx` will be checked if it's defined in `CREATE` query. [#11199](https://github.com/ClickHouse/ClickHouse/pull/11199) ([alesapin](https://github.com/alesapin)). +* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix wrong markup in documentation. [#11263](https://github.com/ClickHouse/ClickHouse/pull/11263) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes [#10586](https://github.com/ClickHouse/ClickHouse/issues/10586). [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix insignificant data race in clickhouse-copier. Found by integration tests. [#11313](https://github.com/ClickHouse/ClickHouse/pull/11313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now merges stopped before change metadata in `ALTER` queries. [#11335](https://github.com/ClickHouse/ClickHouse/pull/11335) ([alesapin](https://github.com/alesapin)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash in direct selects from StorageJoin (without JOIN) and wrong nullability. [#11340](https://github.com/ClickHouse/ClickHouse/pull/11340) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix potential segfault when using `Lazy` database. [#11348](https://github.com/ClickHouse/ClickHouse/pull/11348) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix support for `\G` (vertical output) in clickhouse-client in multiline mode. This closes [#9933](https://github.com/ClickHouse/ClickHouse/issues/9933). [#11350](https://github.com/ClickHouse/ClickHouse/pull/11350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove redundant lock during parts send in ReplicatedMergeTree. [#11354](https://github.com/ClickHouse/ClickHouse/pull/11354) ([alesapin](https://github.com/alesapin)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Better errors for `joinGet()` functions. [#11389](https://github.com/ClickHouse/ClickHouse/pull/11389) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix crash in JOIN over LowCarinality(T) and Nullable(T). [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong result in queries like `select count() from t, u`. [#11454](https://github.com/ClickHouse/ClickHouse/pull/11454) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix `Pipeline stuck` exception for `INSERT SELECT FINAL` where `SELECT` (`max_threads`>1) has multiple streams but `INSERT` has only one (`max_insert_threads`==0). [#11455](https://github.com/ClickHouse/ClickHouse/pull/11455) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix race condition which may lead to an exception during table drop. It's a bit tricky and not dangerous at all. If you want an explanation, just notice me in telegram. [#11523](https://github.com/ClickHouse/ClickHouse/pull/11523) ([alesapin](https://github.com/alesapin)). +* Fix async INSERT into Distributed for prefer_localhost_replica=0 and w/o internal_replication. [#11527](https://github.com/ClickHouse/ClickHouse/pull/11527) ([Azat Khuzhin](https://github.com/azat)). +* Fix shard_num/replica_num for `` (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* All queries in HTTP session have had the same query_id. It is fixed. [#11578](https://github.com/ClickHouse/ClickHouse/pull/11578) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed rare segfault in `SHOW CREATE TABLE` Fixes [#11490](https://github.com/ClickHouse/ClickHouse/issues/11490). [#11579](https://github.com/ClickHouse/ClickHouse/pull/11579) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix race conditions in CREATE/DROP of different replicas of ReplicatedMergeTree. Continue to work if the table was not removed completely from ZooKeeper or not created successfully. This fixes [#11432](https://github.com/ClickHouse/ClickHouse/issues/11432). [#11592](https://github.com/ClickHouse/ClickHouse/pull/11592) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* without -q option the database does not get created at startup. [#11604](https://github.com/ClickHouse/ClickHouse/pull/11604) ([giordyb](https://github.com/giordyb)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix syntax hilite in CREATE USER query. [#11664](https://github.com/ClickHouse/ClickHouse/pull/11664) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error which leads to an incorrect state of `system.mutations`. It may show that whole mutation is already done but the server still has `MUTATE_PART` tasks in the replication queue and tries to execute them. This fixes [#11611](https://github.com/ClickHouse/ClickHouse/issues/11611). [#11681](https://github.com/ClickHouse/ClickHouse/pull/11681) ([alesapin](https://github.com/alesapin)). +* Fix possible `Pipeline stuck` for selects with parallel `FINAL`. Fixes [#11636](https://github.com/ClickHouse/ClickHouse/issues/11636). [#11682](https://github.com/ClickHouse/ClickHouse/pull/11682) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `LIMIT n WITH TIES` usage together with `ORDER BY` statement, which contains aliases. [#11689](https://github.com/ClickHouse/ClickHouse/pull/11689) ([Anton Popov](https://github.com/CurtizJ)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix very rare race condition in SYSTEM SYNC REPLICA. If the replicated table is created and at the same time from the separate connection another client is issuing `SYSTEM SYNC REPLICA` command on that table (this is unlikely, because another client should be aware that the table is created), it's possible to get nullptr dereference. [#11691](https://github.com/ClickHouse/ClickHouse/pull/11691) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Parse metadata stored in zookeeper before checking for equality. [#11739](https://github.com/ClickHouse/ClickHouse/pull/11739) ([Azat Khuzhin](https://github.com/azat)). +* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)). +* Fixes crash in special generated queries when `optimize_arithmetic_operations_in_aggregate_functions = 1`. [#11756](https://github.com/ClickHouse/ClickHouse/pull/11756) ([Ruslan](https://github.com/kamalov-ruslan)). +* Fixed `Scalar doesn't exist` exception when using `WITH ...` in `SELECT ... FROM merge_tree_table ...` [#11621](https://github.com/ClickHouse/ClickHouse/issues/11621). [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)). +* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Preserve column alias with optimize_aggregators_of_group_by_keys (`optimize_aggregators_of_group_by_keys` has been introduced in [#11667](https://github.com/ClickHouse/ClickHouse/issues/11667)). [#11806](https://github.com/ClickHouse/ClickHouse/pull/11806) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Add new build for query tests using pytest framework. [#10039](https://github.com/ClickHouse/ClickHouse/pull/10039) ([Ivan](https://github.com/abyss7)). +* Fix FreeBSD build. [#10150](https://github.com/ClickHouse/ClickHouse/pull/10150) ([Ivan](https://github.com/abyss7)). +* Fix UBSan report in Decimal parse. This fixes [#7540](https://github.com/ClickHouse/ClickHouse/issues/7540). [#10512](https://github.com/ClickHouse/ClickHouse/pull/10512) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adding fuzzers and preparing for oss-fuzz integration. [#10546](https://github.com/ClickHouse/ClickHouse/pull/10546) ([kyprizel](https://github.com/kyprizel)). +* Enable ThinLTO for clang builds, continuation of https://github.com/ClickHouse/ClickHouse/pull/10435. [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)). +* Increasing timeout when opening a client in tests/queries/0_stateless/helpers/client.py. [#10599](https://github.com/ClickHouse/ClickHouse/pull/10599) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fixing hard coded timeouts in new live view tests. [#10604](https://github.com/ClickHouse/ClickHouse/pull/10604) ([vzakaznikov](https://github.com/vzakaznikov)). +* Lower memory usage in tests. It may fix the issue that "address sanitizer is out of memory" in stress test. [#10617](https://github.com/ClickHouse/ClickHouse/pull/10617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `capnproto` version check for `capnp::UnalignedFlatArrayMessageReader`. [#10618](https://github.com/ClickHouse/ClickHouse/pull/10618) ([Matwey V. Kornilov](https://github.com/matwey)). +* Added auto-generated machine-readable file with list of stable versions. [#10628](https://github.com/ClickHouse/ClickHouse/pull/10628) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update LZ4 to the latest dev branch. It may fix the error under UBSan. [#10630](https://github.com/ClickHouse/ClickHouse/pull/10630) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in LZ4 library. [#10631](https://github.com/ClickHouse/ClickHouse/pull/10631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to use lld to link blobs (resources). [#10632](https://github.com/ClickHouse/ClickHouse/pull/10632) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove external call to `ld` (bfd) linker during tzdata processing in compile time. [#10634](https://github.com/ClickHouse/ClickHouse/pull/10634) ([alesapin](https://github.com/alesapin)). +* Fix UBSan report (adding zero to nullptr) in HashTable that appeared after migration to clang-10. [#10638](https://github.com/ClickHouse/ClickHouse/pull/10638) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix OOM in ASan stress test. [#10646](https://github.com/ClickHouse/ClickHouse/pull/10646) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixing and re-enabling 00979_live_view_watch_continuous_aggregates.py test. [#10658](https://github.com/ClickHouse/ClickHouse/pull/10658) ([vzakaznikov](https://github.com/vzakaznikov)). +* Update zstd to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Volumes and storages refactoring. [#10666](https://github.com/ClickHouse/ClickHouse/pull/10666) ([Gleb Novikov](https://github.com/NanoBjorn)). +* Trying to fix tests/queries/0_stateless/01246_insert_into_watch_live_view.py test. [#10670](https://github.com/ClickHouse/ClickHouse/pull/10670) ([vzakaznikov](https://github.com/vzakaznikov)). +* Update instruction to install RPM packages. This was suggested by Denis (TG login @ldviolet) and implemented by Arkady Shejn. [#10707](https://github.com/ClickHouse/ClickHouse/pull/10707) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update cross-builds to use clang-10 compiler. [#10724](https://github.com/ClickHouse/ClickHouse/pull/10724) ([Ivan](https://github.com/abyss7)). +* Fix performance test errors. [#10766](https://github.com/ClickHouse/ClickHouse/pull/10766) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix performance tests errors, part 2. [#10773](https://github.com/ClickHouse/ClickHouse/pull/10773) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Restore a patch that was accidentially deleted in [#10396](https://github.com/ClickHouse/ClickHouse/issues/10396). [#10774](https://github.com/ClickHouse/ClickHouse/pull/10774) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Suppressions of warnings from libraries was mistakenly declared as public in [#10396](https://github.com/ClickHouse/ClickHouse/issues/10396). [#10776](https://github.com/ClickHouse/ClickHouse/pull/10776) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable extra warnings for base, utils, programs. [#10779](https://github.com/ClickHouse/ClickHouse/pull/10779) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* GRPC make couldn't find protobuf files, changed make file by adding the right link. [#10794](https://github.com/ClickHouse/ClickHouse/pull/10794) ([mnkonkova](https://github.com/mnkonkova)). +* Add MSan suppression for MariaDB Client library. [#10800](https://github.com/ClickHouse/ClickHouse/pull/10800) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix (false) MSan report in MergeTreeIndexFullText. The issue first appeared in [#9968](https://github.com/ClickHouse/ClickHouse/issues/9968). [#10801](https://github.com/ClickHouse/ClickHouse/pull/10801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix obvious race condition in "Split build smoke test" check. [#10820](https://github.com/ClickHouse/ClickHouse/pull/10820) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better cooperation with sanitizers. Print information about query_id in the message of sanitizer failure. [#10832](https://github.com/ClickHouse/ClickHouse/pull/10832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added more asserts in columns code. [#10833](https://github.com/ClickHouse/ClickHouse/pull/10833) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prepare to add MSan and UBSan stress tests. [#10871](https://github.com/ClickHouse/ClickHouse/pull/10871) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Database is recreated for every test. This improves separation of tests. [#10902](https://github.com/ClickHouse/ClickHouse/pull/10902) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added a test for empty external data. [#10926](https://github.com/ClickHouse/ClickHouse/pull/10926) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Return tzdata to build images and as dependency to .deb package. [#10929](https://github.com/ClickHouse/ClickHouse/pull/10929) ([alesapin](https://github.com/alesapin)). +* Fix non-deterministic test. [#10989](https://github.com/ClickHouse/ClickHouse/pull/10989) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Wait for odbc-bridge with exponential backoff. Previous wait time of 200 ms was not enough in our CI environment. [#10990](https://github.com/ClickHouse/ClickHouse/pull/10990) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable clang-tidy for programs and utils. [#10991](https://github.com/ClickHouse/ClickHouse/pull/10991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to run zookeeper in integration tests over tmpfs. [#11002](https://github.com/ClickHouse/ClickHouse/pull/11002) ([alesapin](https://github.com/alesapin)). +* Fixing 00979_live_view_watch_continuous_aggregates test. [#11024](https://github.com/ClickHouse/ClickHouse/pull/11024) ([vzakaznikov](https://github.com/vzakaznikov)). +* Make `system_tables_lazy_load` false by default. [#11029](https://github.com/ClickHouse/ClickHouse/pull/11029) ([Azat Khuzhin](https://github.com/azat)). +* Add performance test for non-constant polygons. [#11141](https://github.com/ClickHouse/ClickHouse/pull/11141) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Create root S3 bucket for tests before any CH instance is started. [#11142](https://github.com/ClickHouse/ClickHouse/pull/11142) ([Pavel Kovalenko](https://github.com/Jokser)). +* Enable performance test that was not working. [#11158](https://github.com/ClickHouse/ClickHouse/pull/11158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve build scripts for protobuf & gRPC. [#11172](https://github.com/ClickHouse/ClickHouse/pull/11172) ([Vitaly Baranov](https://github.com/vitlibar)). +* Split /programs/server into actual program and library. [#11186](https://github.com/ClickHouse/ClickHouse/pull/11186) ([Ivan](https://github.com/abyss7)). +* Now parts of linker command for `cctz` library will not be shuffled with other libraries. [#11213](https://github.com/ClickHouse/ClickHouse/pull/11213) ([alesapin](https://github.com/alesapin)). +* Fix several non significant errors in unit tests. [#11262](https://github.com/ClickHouse/ClickHouse/pull/11262) ([alesapin](https://github.com/alesapin)). +* Add a test for Join table engine from @donmikel. This closes [#9158](https://github.com/ClickHouse/ClickHouse/issues/9158). [#11265](https://github.com/ClickHouse/ClickHouse/pull/11265) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Repeat test in CI if `curl` invocation was timed out. It is possible due to system hangups for 10+ seconds that are typical in our CI infrastructure. This fixes [#11267](https://github.com/ClickHouse/ClickHouse/issues/11267). [#11268](https://github.com/ClickHouse/ClickHouse/pull/11268) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potentially flacky test `00731_long_merge_tree_select_opened_files.sh`. It does not fail frequently but we have discovered potential race condition in this test while experimenting with ThreadFuzzer: [#9814](https://github.com/ClickHouse/ClickHouse/issues/9814) See [link](https://clickhouse-test-reports.s3.yandex.net/9814/40e3023e215df22985d275bf85f4d2290897b76b/functional_stateless_tests_(unbundled).html#fail1) for the example. [#11270](https://github.com/ClickHouse/ClickHouse/pull/11270) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now clickhouse-test check the server aliveness before tests run. [#11285](https://github.com/ClickHouse/ClickHouse/pull/11285) ([alesapin](https://github.com/alesapin)). +* Emit a warning if server was build in debug or with sanitizers. [#11304](https://github.com/ClickHouse/ClickHouse/pull/11304) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better check for hung queries in clickhouse-test. [#11321](https://github.com/ClickHouse/ClickHouse/pull/11321) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove redundant timeout from integration test `test_insertion_sync_fails_with_timeout`. [#11343](https://github.com/ClickHouse/ClickHouse/pull/11343) ([alesapin](https://github.com/alesapin)). +* Add support for unit tests run with UBSan. [#11345](https://github.com/ClickHouse/ClickHouse/pull/11345) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix several flaky integration tests. [#11355](https://github.com/ClickHouse/ClickHouse/pull/11355) ([alesapin](https://github.com/alesapin)). +* Refactor CMake build files. [#11390](https://github.com/ClickHouse/ClickHouse/pull/11390) ([Ivan](https://github.com/abyss7)). +* Leave only unit_tests_dbms in deb build. [#11429](https://github.com/ClickHouse/ClickHouse/pull/11429) ([Ilya Yatsishin](https://github.com/qoega)). +* Increase ccache size for builds in CI. [#11450](https://github.com/ClickHouse/ClickHouse/pull/11450) ([alesapin](https://github.com/alesapin)). +* Speed up build by removing old example programs. Also found some orphan functional test. [#11486](https://github.com/ClickHouse/ClickHouse/pull/11486) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix missed include for `std::move` used at line 17. [#11525](https://github.com/ClickHouse/ClickHouse/pull/11525) ([Matwey V. Kornilov](https://github.com/matwey)). +* Added a random sampling of instances where copier is executed. It is needed to avoid `Too many simultaneous queries` error. Also increased timeout and decreased fault probability. [#11573](https://github.com/ClickHouse/ClickHouse/pull/11573) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Removes unused imports from HTTPHandlerFactory. [#11660](https://github.com/ClickHouse/ClickHouse/pull/11660) ([Bharat Nallan](https://github.com/bharatnc)). +* Don't allow tests with "fail" substring in their names because it makes looking at the tests results in browser less convenient when you type Ctrl+F and search for "fail". [#11817](https://github.com/ClickHouse/ClickHouse/pull/11817) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added a test to ensure that mutations continue to work after FREEZE query. [#11820](https://github.com/ClickHouse/ClickHouse/pull/11820) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print compiler build id in crash messages. It will make us slightly more certain about what binary has crashed. Added new function `buildId`. [#11824](https://github.com/ClickHouse/ClickHouse/pull/11824) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove dependency on `tzdata`: do not fail if `/usr/share/zoneinfo` directory does not exist. Note that all timezones work in ClickHouse even without tzdata installed in system. [#11827](https://github.com/ClickHouse/ClickHouse/pull/11827) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NO CL CATEGORY + +* * Not for changelog. [#10985](https://github.com/ClickHouse/ClickHouse/pull/10985) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump numpy from 1.18.3 to 1.18.4 in /docs/tools'. [#10648](https://github.com/ClickHouse/ClickHouse/pull/10648) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: '[ImgBot] Optimize images'. [#10796](https://github.com/ClickHouse/ClickHouse/pull/10796) ([imgbot[bot]](https://github.com/apps/imgbot)). +* NO CL ENTRY: 'Bump mkdocs from 1.1 to 1.1.1 in /docs/tools'. [#10877](https://github.com/ClickHouse/ClickHouse/pull/10877) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.6 to 0.4.7 in /docs/tools'. [#10878](https://github.com/ClickHouse/ClickHouse/pull/10878) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump mkdocs from 1.1.1 to 1.1.2 in /docs/tools'. [#10938](https://github.com/ClickHouse/ClickHouse/pull/10938) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump protobuf from 3.11.3 to 3.12.0 in /docs/tools'. [#10995](https://github.com/ClickHouse/ClickHouse/pull/10995) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump beautifulsoup4 from 4.9.0 to 4.9.1 in /docs/tools'. [#10996](https://github.com/ClickHouse/ClickHouse/pull/10996) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump soupsieve from 2.0 to 2.0.1 in /docs/tools'. [#10997](https://github.com/ClickHouse/ClickHouse/pull/10997) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump mkdocs-macros-plugin from 0.4.7 to 0.4.9 in /docs/tools'. [#11064](https://github.com/ClickHouse/ClickHouse/pull/11064) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump protobuf from 3.12.0 to 3.12.1 in /docs/tools'. [#11093](https://github.com/ClickHouse/ClickHouse/pull/11093) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump six from 1.14.0 to 1.15.0 in /docs/tools'. [#11129](https://github.com/ClickHouse/ClickHouse/pull/11129) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump protobuf from 3.12.1 to 3.12.2 in /docs/tools'. [#11241](https://github.com/ClickHouse/ClickHouse/pull/11241) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump numpy from 1.18.4 to 1.18.5 in /docs/tools'. [#11427](https://github.com/ClickHouse/ClickHouse/pull/11427) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump livereload from 2.6.1 to 2.6.2 in /docs/tools'. [#11502](https://github.com/ClickHouse/ClickHouse/pull/11502) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump certifi from 2020.4.5.1 to 2020.4.5.2 in /docs/tools'. [#11503](https://github.com/ClickHouse/ClickHouse/pull/11503) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump googletrans from 2.4.0 to 3.0.0 in /docs/tools'. [#11675](https://github.com/ClickHouse/ClickHouse/pull/11675) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump requests from 2.23.0 to 2.24.0 in /docs/tools'. [#11750](https://github.com/ClickHouse/ClickHouse/pull/11750) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + +#### New Feature: function randomStringUTF8 + +* Added function randomStringUTF8. [#10972](https://github.com/ClickHouse/ClickHouse/pull/10972) ([Andrei Nekrashevich](https://github.com/axolm)). + diff --git a/docs/changelogs/v20.5.2.7-stable.md b/docs/changelogs/v20.5.2.7-stable.md new file mode 100644 index 00000000000..10b598cb9ba --- /dev/null +++ b/docs/changelogs/v20.5.2.7-stable.md @@ -0,0 +1,33 @@ +### ClickHouse release v20.5.2.7-stable FIXME as compared to v20.5.1.3833-prestable + +#### New Feature +* Add `Alter table drop replica replica_name` support. This fixes [#7080](https://github.com/ClickHouse/ClickHouse/issues/7080). [#10679](https://github.com/ClickHouse/ClickHouse/pull/10679) ([sundyli](https://github.com/sundy-li)). + +#### Improvement +* Add number of errors to ignore while choosing replicas (`distributed_replica_error_ignore`). [#11669](https://github.com/ClickHouse/ClickHouse/pull/11669) ([Azat Khuzhin](https://github.com/azat)). +* Multiversion metadata for storages without structure locks. [#11745](https://github.com/ClickHouse/ClickHouse/pull/11745) ([alesapin](https://github.com/alesapin)). +* Slightly relax the validation of ODBC connection string. If the hostname or username contains only word characters along with `.` and `-`, don't put it into curly braces. It is needed, because some ODBC drivers (e.g. PostgreSQL) don't understand when hostname is enclosed in curly braces. [#11845](https://github.com/ClickHouse/ClickHouse/pull/11845) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support `SIGNED` and `UNSIGNED` modifiers of standard integer types (`BIGINT`, `INT`, ...) for compatibility with MySQL. [#11858](https://github.com/ClickHouse/ClickHouse/pull/11858) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to use `sumWithOverflow` as `SimpleAggregateFunction`. Closes [#8053](https://github.com/ClickHouse/ClickHouse/issues/8053). [#11865](https://github.com/ClickHouse/ClickHouse/pull/11865) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add FixedString support in Hashing functions. [#11878](https://github.com/ClickHouse/ClickHouse/pull/11878) ([flynn](https://github.com/ucasfl)). + +#### Bug Fix +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Fix wrong result for `if()` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix memory accounting via HTTP interface (can be significant with `wait_end_of_query=1`). [#11840](https://github.com/ClickHouse/ClickHouse/pull/11840) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#11965](https://github.com/ClickHouse/ClickHouse/issues/11965): Fix potential floating point exception when parsing DateTime64. This fixes [#11374](https://github.com/ClickHouse/ClickHouse/issues/11374). [#11875](https://github.com/ClickHouse/ClickHouse/pull/11875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential floating point exception when parsing DateTime64. This fixes [#11374](https://github.com/ClickHouse/ClickHouse/issues/11374). [#11875](https://github.com/ClickHouse/ClickHouse/pull/11875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#11963](https://github.com/ClickHouse/ClickHouse/issues/11963): Use the correct current database for checking access rights after statement `USE database`. [#11920](https://github.com/ClickHouse/ClickHouse/pull/11920) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#12059](https://github.com/ClickHouse/ClickHouse/issues/12059): Fix incorrect comparison of tuples with `Nullable` columns. Fixes [#11985](https://github.com/ClickHouse/ClickHouse/issues/11985). [#12039](https://github.com/ClickHouse/ClickHouse/pull/12039) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Build/Testing/Packaging Improvement +* Add simple GitHub hook script for the serverless environment. [#11605](https://github.com/ClickHouse/ClickHouse/pull/11605) ([alesapin](https://github.com/alesapin)). +* Send logs to client on fatal errors if possible. This will make test results more readable. [#11826](https://github.com/ClickHouse/ClickHouse/pull/11826) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow ClickHouse to run on Android. [#11894](https://github.com/ClickHouse/ClickHouse/pull/11894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Merging [#10679](https://github.com/ClickHouse/ClickHouse/issues/10679)'. [#11896](https://github.com/ClickHouse/ClickHouse/pull/11896) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Cherry pick [#11875](https://github.com/ClickHouse/ClickHouse/issues/11875) to 20.5: Fix strange and wrong code around DateTime64'. [#11958](https://github.com/ClickHouse/ClickHouse/pull/11958) ([Ivan](https://github.com/abyss7)). + diff --git a/docs/changelogs/v20.5.3.27-stable.md b/docs/changelogs/v20.5.3.27-stable.md new file mode 100644 index 00000000000..58b265243ef --- /dev/null +++ b/docs/changelogs/v20.5.3.27-stable.md @@ -0,0 +1,60 @@ +### ClickHouse release v20.5.3.27-stable FIXME as compared to v20.5.2.7-stable + +#### Improvement +* Moved useless S3 logging to TRACE level. [#12067](https://github.com/ClickHouse/ClickHouse/pull/12067) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Bug Fix +* Backported in [#11967](https://github.com/ClickHouse/ClickHouse/issues/11967): Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#11968](https://github.com/ClickHouse/ClickHouse/issues/11968): Fix memory accounting via HTTP interface (can be significant with `wait_end_of_query=1`). [#11840](https://github.com/ClickHouse/ClickHouse/pull/11840) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12354](https://github.com/ClickHouse/ClickHouse/issues/12354): Fixed bug with no moves when changing storage policy from default one. [#11893](https://github.com/ClickHouse/ClickHouse/pull/11893) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fixed bug with no moves when changing storage policy from default one. [#11893](https://github.com/ClickHouse/ClickHouse/pull/11893) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#12244](https://github.com/ClickHouse/ClickHouse/issues/12244): Fix wrong setting name in log message at server startup. [#11997](https://github.com/ClickHouse/ClickHouse/pull/11997) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Partial revokes work correctly in complex cases as well, for example. [#12002](https://github.com/ClickHouse/ClickHouse/pull/12002) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#12245](https://github.com/ClickHouse/ClickHouse/issues/12245): Fix potential floating point exception. This closes [#11378](https://github.com/ClickHouse/ClickHouse/issues/11378). [#12005](https://github.com/ClickHouse/ClickHouse/pull/12005) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12249](https://github.com/ClickHouse/ClickHouse/issues/12249): Fix potential array size overflow in generateRandom that may lead to crash. This fixes [#11371](https://github.com/ClickHouse/ClickHouse/issues/11371). [#12013](https://github.com/ClickHouse/ClickHouse/pull/12013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12251](https://github.com/ClickHouse/ClickHouse/issues/12251): A query with function `neighbor` as the only returned expression may return empty result if the function is called with offset `-9223372036854775808`. This fixes [#11367](https://github.com/ClickHouse/ClickHouse/issues/11367). [#12019](https://github.com/ClickHouse/ClickHouse/pull/12019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12272](https://github.com/ClickHouse/ClickHouse/issues/12272): Do not mention in changelog, because the bug did not come to release. Fix potential crash when doing ORDER BY multiple columns with specified COLLATE on one of the column when this column is constant. This fixes [#11379](https://github.com/ClickHouse/ClickHouse/issues/11379). The bug was introduced in [#11006](https://github.com/ClickHouse/ClickHouse/issues/11006) in version 20.5. [#12020](https://github.com/ClickHouse/ClickHouse/pull/12020) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12241](https://github.com/ClickHouse/ClickHouse/issues/12241): Fix wrong result and potential crash when invoking function `if` with arguments of type `FixedString` with different sizes. This fixes [#11362](https://github.com/ClickHouse/ClickHouse/issues/11362). [#12021](https://github.com/ClickHouse/ClickHouse/pull/12021) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12230](https://github.com/ClickHouse/ClickHouse/issues/12230): Fix crash in JOIN with LowCardinality type with `join_algorithm=partial_merge`. [#12035](https://github.com/ClickHouse/ClickHouse/pull/12035) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#12231](https://github.com/ClickHouse/ClickHouse/issues/12231): Fix constraints check if constraint is a constant expression. This fixes [#11360](https://github.com/ClickHouse/ClickHouse/issues/11360). [#12042](https://github.com/ClickHouse/ClickHouse/pull/12042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12232](https://github.com/ClickHouse/ClickHouse/issues/12232): Make `topK` aggregate function return Enum for Enum types. This fixes [#3740](https://github.com/ClickHouse/ClickHouse/issues/3740). [#12043](https://github.com/ClickHouse/ClickHouse/pull/12043) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12233](https://github.com/ClickHouse/ClickHouse/issues/12233): Parse tables metadata in parallel when loading database. This fixes slow server startup when there are large number of tables. [#12045](https://github.com/ClickHouse/ClickHouse/pull/12045) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#12234](https://github.com/ClickHouse/ClickHouse/issues/12234): Fix error `Cannot capture column` for higher-order functions with `Tuple(LowCardinality)` argument. Fixes [#9766](https://github.com/ClickHouse/ClickHouse/issues/9766). [#12055](https://github.com/ClickHouse/ClickHouse/pull/12055) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error `Expected single dictionary argument for function` for function `defaultValueOfArgumentType` with `LowCardinality` type. Fixes [#11808](https://github.com/ClickHouse/ClickHouse/issues/11808). [#12056](https://github.com/ClickHouse/ClickHouse/pull/12056) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12235](https://github.com/ClickHouse/ClickHouse/issues/12235): Fix error `Expected single dictionary argument for function` for function `defaultValueOfArgumentType` with `LowCardinality` type. Fixes [#11808](https://github.com/ClickHouse/ClickHouse/issues/11808). [#12056](https://github.com/ClickHouse/ClickHouse/pull/12056) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12236](https://github.com/ClickHouse/ClickHouse/issues/12236): Fix possible crash while using wrong type for `PREWHERE`. Fixes [#12053](https://github.com/ClickHouse/ClickHouse/issues/12053), [#12060](https://github.com/ClickHouse/ClickHouse/issues/12060). [#12060](https://github.com/ClickHouse/ClickHouse/pull/12060) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12237](https://github.com/ClickHouse/ClickHouse/issues/12237): Fix SIGSEGV in StorageKafka on DROP TABLE. [#12075](https://github.com/ClickHouse/ClickHouse/pull/12075) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12239](https://github.com/ClickHouse/ClickHouse/issues/12239): Fix empty `result_rows` and `result_bytes` metrics in `system.quey_log` for selects. Fixes [#11595](https://github.com/ClickHouse/ClickHouse/issues/11595). [#12089](https://github.com/ClickHouse/ClickHouse/pull/12089) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12240](https://github.com/ClickHouse/ClickHouse/issues/12240): Fix segfault with `-StateResample` combinators. [#12092](https://github.com/ClickHouse/ClickHouse/pull/12092) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#12242](https://github.com/ClickHouse/ClickHouse/issues/12242): Format `Parquet` now properly works with `LowCardinality` and `LowCardinality(Nullable)` types. Fixes [#12086](https://github.com/ClickHouse/ClickHouse/issues/12086), [#8406](https://github.com/ClickHouse/ClickHouse/issues/8406). [#12108](https://github.com/ClickHouse/ClickHouse/pull/12108) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12260](https://github.com/ClickHouse/ClickHouse/issues/12260): Fix handling dependency of table with ENGINE=Dictionary on dictionary. This fixes [#10994](https://github.com/ClickHouse/ClickHouse/issues/10994). This fixes [#10397](https://github.com/ClickHouse/ClickHouse/issues/10397). [#12116](https://github.com/ClickHouse/ClickHouse/pull/12116) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#12243](https://github.com/ClickHouse/ClickHouse/issues/12243): Avoid "There is no query" exception for materialized views with joins or with subqueries attached to system logs (system.query_log, metric_log, etc) or to engine=Buffer underlying table. [#12120](https://github.com/ClickHouse/ClickHouse/pull/12120) ([filimonov](https://github.com/filimonov)). +* Backported in [#12392](https://github.com/ClickHouse/ClickHouse/issues/12392): Fix bug which leads to incorrect table metadata in ZooKeepeer for ReplicatedVersionedCollapsingMergeTree tables. Fixes [#12093](https://github.com/ClickHouse/ClickHouse/issues/12093). [#12121](https://github.com/ClickHouse/ClickHouse/pull/12121) ([alesapin](https://github.com/alesapin)). +* Backported in [#12246](https://github.com/ClickHouse/ClickHouse/issues/12246): Normalize "pid" file handling. In previous versions the server may refuse to start if it was killed without proper shutdown and if there is another process that has the same pid as previously runned server. Also pid file may be removed in unsuccessful server startup even if there is another server running. This fixes [#3501](https://github.com/ClickHouse/ClickHouse/issues/3501). [#12133](https://github.com/ClickHouse/ClickHouse/pull/12133) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12247](https://github.com/ClickHouse/ClickHouse/issues/12247): Fix potential infinite loop in `greatCircleDistance`, `geoDistance`. This fixes [#12117](https://github.com/ClickHouse/ClickHouse/issues/12117). [#12137](https://github.com/ClickHouse/ClickHouse/pull/12137) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12248](https://github.com/ClickHouse/ClickHouse/issues/12248): Fix potential overflow in integer division. This fixes [#12119](https://github.com/ClickHouse/ClickHouse/issues/12119). [#12140](https://github.com/ClickHouse/ClickHouse/pull/12140) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12252](https://github.com/ClickHouse/ClickHouse/issues/12252): Fix bad code in redundant ORDER BY optimization. The bug was introduced in [#10067](https://github.com/ClickHouse/ClickHouse/issues/10067). [#12148](https://github.com/ClickHouse/ClickHouse/pull/12148) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12360](https://github.com/ClickHouse/ClickHouse/issues/12360): Fix transform of query to send to external DBMS (e.g. MySQL, ODBC) in presense of aliases. This fixes [#12032](https://github.com/ClickHouse/ClickHouse/issues/12032). [#12151](https://github.com/ClickHouse/ClickHouse/pull/12151) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12253](https://github.com/ClickHouse/ClickHouse/issues/12253): Fix wrong logic in ALTER DELETE that leads to deleting of records when condition evaluates to NULL. This fixes [#9088](https://github.com/ClickHouse/ClickHouse/issues/9088). This closes [#12106](https://github.com/ClickHouse/ClickHouse/issues/12106). [#12153](https://github.com/ClickHouse/ClickHouse/pull/12153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12259](https://github.com/ClickHouse/ClickHouse/issues/12259): Don't split the dictionary source's table name into schema and table name itself if ODBC connection doesn't support schema. [#12165](https://github.com/ClickHouse/ClickHouse/pull/12165) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#12366](https://github.com/ClickHouse/ClickHouse/issues/12366): Fix dictGet arguments check during GROUP BY injective functions elimination. [#12179](https://github.com/ClickHouse/ClickHouse/pull/12179) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12367](https://github.com/ClickHouse/ClickHouse/issues/12367): Cap max_memory_usage* limits to the process resident memory. [#12182](https://github.com/ClickHouse/ClickHouse/pull/12182) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12352](https://github.com/ClickHouse/ClickHouse/issues/12352): Fixed logical functions for UInt8 values when they are not equal to 0 or 1. [#12196](https://github.com/ClickHouse/ClickHouse/pull/12196) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#12369](https://github.com/ClickHouse/ClickHouse/issues/12369): Fixed behaviour on reaching redirect limit in request to S3 storage. [#12256](https://github.com/ClickHouse/ClickHouse/pull/12256) ([ianton-ru](https://github.com/ianton-ru)). +* Backported in [#12381](https://github.com/ClickHouse/ClickHouse/issues/12381): Not for changelog. Cherry-pick after [#12196](https://github.com/ClickHouse/ClickHouse/issues/12196). [#12271](https://github.com/ClickHouse/ClickHouse/pull/12271) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12370](https://github.com/ClickHouse/ClickHouse/issues/12370): Implement conversions to the common type for LowCardinality types. This allows to execute UNION ALL of tables with columns of LowCardinality and other columns. This fixes [#8212](https://github.com/ClickHouse/ClickHouse/issues/8212). This fixes [#4342](https://github.com/ClickHouse/ClickHouse/issues/4342). [#12275](https://github.com/ClickHouse/ClickHouse/pull/12275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12368](https://github.com/ClickHouse/ClickHouse/issues/12368): The function `arrayFill` worked incorrectly for empty arrays that may lead to crash. This fixes [#12263](https://github.com/ClickHouse/ClickHouse/issues/12263). [#12279](https://github.com/ClickHouse/ClickHouse/pull/12279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12362](https://github.com/ClickHouse/ClickHouse/issues/12362): Fix typo in setting name. [#12292](https://github.com/ClickHouse/ClickHouse/pull/12292) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12363](https://github.com/ClickHouse/ClickHouse/issues/12363): Some threads might randomly hang for a few seconds during DNS cache updating. It's fixed. [#12296](https://github.com/ClickHouse/ClickHouse/pull/12296) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#12364](https://github.com/ClickHouse/ClickHouse/issues/12364): Fix TTL after renaming column, on which depends TTL expression. [#12304](https://github.com/ClickHouse/ClickHouse/pull/12304) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#12365](https://github.com/ClickHouse/ClickHouse/issues/12365): Avoid "bad cast" exception when there is an expression that filters data by virtual columns (like `_table` in `Merge` tables) or by "index" columns in system tables such as filtering by database name when querying from `system.tables`, and this expression returns `Nullable` type. This fixes [#12166](https://github.com/ClickHouse/ClickHouse/issues/12166). [#12305](https://github.com/ClickHouse/ClickHouse/pull/12305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12375](https://github.com/ClickHouse/ClickHouse/issues/12375): Fix order of columns in `WITH FILL` modifier. Previously order of columns of `ORDER BY` statement wasn't respected. [#12306](https://github.com/ClickHouse/ClickHouse/pull/12306) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#12385](https://github.com/ClickHouse/ClickHouse/issues/12385): Fix very rare race condition in ReplicatedMergeTreeQueue. [#12315](https://github.com/ClickHouse/ClickHouse/pull/12315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#12254](https://github.com/ClickHouse/ClickHouse/issues/12254): Install `ca-certificates` before the first `apt-get update` in Dockerfile. [#12095](https://github.com/ClickHouse/ClickHouse/pull/12095) ([Ivan Blinkov](https://github.com/blinkov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Cherry pick [#12056](https://github.com/ClickHouse/ClickHouse/issues/12056) to 20.5: Fix defaultValueOfArgumentType'. [#12205](https://github.com/ClickHouse/ClickHouse/pull/12205) ([robot-clickhouse](https://github.com/robot-clickhouse)). + diff --git a/docs/changelogs/v20.5.4.40-stable.md b/docs/changelogs/v20.5.4.40-stable.md new file mode 100644 index 00000000000..7dbf555ad28 --- /dev/null +++ b/docs/changelogs/v20.5.4.40-stable.md @@ -0,0 +1,32 @@ +### ClickHouse release v20.5.4.40-stable FIXME as compared to v20.5.3.27-stable + +#### Performance Improvement +* Backported in [#12929](https://github.com/ClickHouse/ClickHouse/issues/12929): Fix "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574) Index not used for IN operator with literals", performance regression introduced around v19.3. [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)). + +#### Bug Fix +* Backported in [#12830](https://github.com/ClickHouse/ClickHouse/issues/12830): Fix performance for selects with `UNION` caused by wrong limit for the total number of threads. Fixes [#12030](https://github.com/ClickHouse/ClickHouse/issues/12030). [#12103](https://github.com/ClickHouse/ClickHouse/pull/12103) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12836](https://github.com/ClickHouse/ClickHouse/issues/12836): Fixed the behaviour when `SummingMergeTree` engine sums up columns from partition key. Added an exception in case of explicit definition of columns to sum which intersects with partition key columns. This fixes [#7867](https://github.com/ClickHouse/ClickHouse/issues/7867). [#12173](https://github.com/ClickHouse/ClickHouse/pull/12173) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#12833](https://github.com/ClickHouse/ClickHouse/issues/12833): Fixed the behaviour when during multiple sequential inserts in `StorageFile` header for some special types was written more than once. This fixed [#6155](https://github.com/ClickHouse/ClickHouse/issues/6155). [#12197](https://github.com/ClickHouse/ClickHouse/pull/12197) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#12895](https://github.com/ClickHouse/ClickHouse/issues/12895): kafka: fix SIGSEGV if there is an message with error in the middle of the batch. [#12302](https://github.com/ClickHouse/ClickHouse/pull/12302) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12905](https://github.com/ClickHouse/ClickHouse/issues/12905): Fix TOTALS/ROLLUP/CUBE for aggregate functions with `-State` and `Nullable` arguments. This fixes [#12163](https://github.com/ClickHouse/ClickHouse/issues/12163). [#12376](https://github.com/ClickHouse/ClickHouse/pull/12376) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12907](https://github.com/ClickHouse/ClickHouse/issues/12907): Allow to CLEAR column even if there are depending DEFAULT expressions. This fixes [#12333](https://github.com/ClickHouse/ClickHouse/issues/12333). [#12378](https://github.com/ClickHouse/ClickHouse/pull/12378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12909](https://github.com/ClickHouse/ClickHouse/issues/12909): Avoid exception when negative or floating point constant is used in WHERE condition for indexed tables. This fixes [#11905](https://github.com/ClickHouse/ClickHouse/issues/11905). [#12384](https://github.com/ClickHouse/ClickHouse/pull/12384) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12897](https://github.com/ClickHouse/ClickHouse/issues/12897): Fix crash in JOIN with dictionary when we are joining over expression of dictionary key: `t JOIN dict ON expr(dict.id) = t.id`. Disable dictionary join optimisation for this case. [#12458](https://github.com/ClickHouse/ClickHouse/pull/12458) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#13004](https://github.com/ClickHouse/ClickHouse/issues/13004): Fixed performance issue, while reading from compact parts. [#12492](https://github.com/ClickHouse/ClickHouse/pull/12492) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#12892](https://github.com/ClickHouse/ClickHouse/issues/12892): Fixing race condition in live view tables which could cause data duplication. [#12519](https://github.com/ClickHouse/ClickHouse/pull/12519) ([vzakaznikov](https://github.com/vzakaznikov)). +* Backported in [#12888](https://github.com/ClickHouse/ClickHouse/issues/12888): Now ClickHouse will recalculate checksums for parts when file `checksums.txt` is absent. Broken since [#9827](https://github.com/ClickHouse/ClickHouse/issues/9827). [#12545](https://github.com/ClickHouse/ClickHouse/pull/12545) ([alesapin](https://github.com/alesapin)). +* Backported in [#12873](https://github.com/ClickHouse/ClickHouse/issues/12873): Fix error `Output of TreeExecutor is not sorted` for `OPTIMIZE DEDUPLICATE`. Fixes [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572). [#12613](https://github.com/ClickHouse/ClickHouse/pull/12613) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12885](https://github.com/ClickHouse/ClickHouse/issues/12885): Fix possible `Pipeline stuck` error for queries with external sorting. Fixes [#12617](https://github.com/ClickHouse/ClickHouse/issues/12617). [#12618](https://github.com/ClickHouse/ClickHouse/pull/12618) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12876](https://github.com/ClickHouse/ClickHouse/issues/12876): Better exception message in disk access storage. [#12625](https://github.com/ClickHouse/ClickHouse/pull/12625) ([alesapin](https://github.com/alesapin)). +* Backported in [#12882](https://github.com/ClickHouse/ClickHouse/issues/12882): Exception `There is no supertype...` can be thrown during `ALTER ... UPDATE` in unexpected cases (e.g. when subtracting from UInt64 column). This fixes [#7306](https://github.com/ClickHouse/ClickHouse/issues/7306). This fixes [#4165](https://github.com/ClickHouse/ClickHouse/issues/4165). [#12633](https://github.com/ClickHouse/ClickHouse/pull/12633) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12879](https://github.com/ClickHouse/ClickHouse/issues/12879): Add support for function `if` with `Array(UUID)` arguments. This fixes [#11066](https://github.com/ClickHouse/ClickHouse/issues/11066). [#12648](https://github.com/ClickHouse/ClickHouse/pull/12648) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12972](https://github.com/ClickHouse/ClickHouse/issues/12972): Fix SIGSEGV in StorageKafka when broker is unavailable (and not only). [#12658](https://github.com/ClickHouse/ClickHouse/pull/12658) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12860](https://github.com/ClickHouse/ClickHouse/issues/12860): fixes [#10572](https://github.com/ClickHouse/ClickHouse/issues/10572) fix bloom filter index with const expression. [#12659](https://github.com/ClickHouse/ClickHouse/pull/12659) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#12869](https://github.com/ClickHouse/ClickHouse/issues/12869): fixes [#12293](https://github.com/ClickHouse/ClickHouse/issues/12293) allow push predicate when subquery contains with clause. [#12663](https://github.com/ClickHouse/ClickHouse/pull/12663) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#12866](https://github.com/ClickHouse/ClickHouse/issues/12866): Fix memory tracking for input_format_parallel_parsing (by attaching thread to group). [#12672](https://github.com/ClickHouse/ClickHouse/pull/12672) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13185](https://github.com/ClickHouse/ClickHouse/issues/13185): Fix performance with large tuples, which are interpreted as functions in `IN` section. The case when user write `WHERE x IN tuple(1, 2, ...)` instead of `WHERE x IN (1, 2, ...)` for some obscure reason. [#12700](https://github.com/ClickHouse/ClickHouse/pull/12700) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13032](https://github.com/ClickHouse/ClickHouse/issues/13032): Corrected merge_with_ttl_timeout logic which did not work well when expiration affected more than one partition over one time interval. (Authored by @excitoon). [#12982](https://github.com/ClickHouse/ClickHouse/pull/12982) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#13047](https://github.com/ClickHouse/ClickHouse/issues/13047): Fix `Block structure mismatch` error for queries with `UNION` and `JOIN`. Fixes [#12602](https://github.com/ClickHouse/ClickHouse/issues/12602). [#12989](https://github.com/ClickHouse/ClickHouse/pull/12989) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13050](https://github.com/ClickHouse/ClickHouse/issues/13050): Fix crash which was possible for queries with `ORDER BY` tuple and small `LIMIT`. Fixes [#12623](https://github.com/ClickHouse/ClickHouse/issues/12623). [#13009](https://github.com/ClickHouse/ClickHouse/pull/13009) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13144](https://github.com/ClickHouse/ClickHouse/issues/13144): Fix wrong index analysis with functions. It could lead to pruning wrong parts, while reading from `MergeTree` tables. Fixes [#13060](https://github.com/ClickHouse/ClickHouse/issues/13060). Fixes [#12406](https://github.com/ClickHouse/ClickHouse/issues/12406). [#13081](https://github.com/ClickHouse/ClickHouse/pull/13081) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v20.5.5.74-stable.md b/docs/changelogs/v20.5.5.74-stable.md new file mode 100644 index 00000000000..0f2d5f7aab1 --- /dev/null +++ b/docs/changelogs/v20.5.5.74-stable.md @@ -0,0 +1,34 @@ +### ClickHouse release v20.5.5.74-stable FIXME as compared to v20.5.4.40-stable + +#### Improvement +* Backported in [#13920](https://github.com/ClickHouse/ClickHouse/issues/13920): Fix data race in `lgamma` function. This race was caught only in `tsan`, no side effects a really happened. [#13842](https://github.com/ClickHouse/ClickHouse/pull/13842) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix +* Backported in [#13300](https://github.com/ClickHouse/ClickHouse/issues/13300): The function `groupArrayMoving*` was not working for distributed queries. It's result was calculated within incorrect data type (without promotion to the largest type). The function `groupArrayMovingAvg` was returning integer number that was inconsistent with the `avg` function. This fixes [#12568](https://github.com/ClickHouse/ClickHouse/issues/12568). [#12622](https://github.com/ClickHouse/ClickHouse/pull/12622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13030](https://github.com/ClickHouse/ClickHouse/issues/13030): CREATE USER IF NOT EXISTS now doesn't throw exception if the user exists. This fixes [#12507](https://github.com/ClickHouse/ClickHouse/issues/12507). [#12646](https://github.com/ClickHouse/ClickHouse/pull/12646) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#12996](https://github.com/ClickHouse/ClickHouse/issues/12996): Fix optimization `optimize_move_functions_out_of_any=1` in case of `any(func())`. [#12664](https://github.com/ClickHouse/ClickHouse/pull/12664) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#13092](https://github.com/ClickHouse/ClickHouse/issues/13092): Fix CAST(Nullable(String), Enum()). [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12984](https://github.com/ClickHouse/ClickHouse/issues/12984): Fix columns duplication for range hashed dictionary created from DDL query. This fixes [#10605](https://github.com/ClickHouse/ClickHouse/issues/10605). [#12857](https://github.com/ClickHouse/ClickHouse/pull/12857) ([alesapin](https://github.com/alesapin)). +* Backported in [#13561](https://github.com/ClickHouse/ClickHouse/issues/13561): Fix access to redis dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13508](https://github.com/ClickHouse/ClickHouse/issues/13508): Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#13358](https://github.com/ClickHouse/ClickHouse/issues/13358): AvroConfluent: Skip Kafka tombstone records AvroConfluent: Support skipping broken records ... [#13203](https://github.com/ClickHouse/ClickHouse/pull/13203) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Backported in [#13223](https://github.com/ClickHouse/ClickHouse/issues/13223): Fix DateTime64 conversion functions with constant argument. [#13205](https://github.com/ClickHouse/ClickHouse/pull/13205) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13244](https://github.com/ClickHouse/ClickHouse/issues/13244): Fix assert in `arrayElement` function in case of array elements are Nullable and array subscript is also Nullable. This fixes [#12172](https://github.com/ClickHouse/ClickHouse/issues/12172). [#13224](https://github.com/ClickHouse/ClickHouse/pull/13224) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13268](https://github.com/ClickHouse/ClickHouse/issues/13268): Fix function if with nullable constexpr as cond that is not literal NULL. Fixes [#12463](https://github.com/ClickHouse/ClickHouse/issues/12463). [#13226](https://github.com/ClickHouse/ClickHouse/pull/13226) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13351](https://github.com/ClickHouse/ClickHouse/issues/13351): Return passed number for numbers with MSB set in roundUpToPowerOfTwoOrZero(). [#13234](https://github.com/ClickHouse/ClickHouse/pull/13234) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13299](https://github.com/ClickHouse/ClickHouse/issues/13299): Fix potentially low performance and slightly incorrect result for `uniqExact`, `topK`, `sumDistinct` and similar aggregate functions called on Float types with NaN values. It also triggered assert in debug build. This fixes [#12491](https://github.com/ClickHouse/ClickHouse/issues/12491). [#13254](https://github.com/ClickHouse/ClickHouse/pull/13254) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13315](https://github.com/ClickHouse/ClickHouse/issues/13315): The server may crash if user passed specifically crafted arguments to the function `h3ToChildren`. This fixes [#13275](https://github.com/ClickHouse/ClickHouse/issues/13275). [#13277](https://github.com/ClickHouse/ClickHouse/pull/13277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13328](https://github.com/ClickHouse/ClickHouse/issues/13328): Fix possible error `Totals having transform was already added to pipeline` in case of a query from delayed replica. [#13290](https://github.com/ClickHouse/ClickHouse/pull/13290) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13609](https://github.com/ClickHouse/ClickHouse/issues/13609): Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13482](https://github.com/ClickHouse/ClickHouse/issues/13482): Fix queries with constant columns and `ORDER BY` prefix of primary key. [#13396](https://github.com/ClickHouse/ClickHouse/pull/13396) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13488](https://github.com/ClickHouse/ClickHouse/issues/13488): Fix empty output for `Arrow` and `Parquet` formats in case if query return zero rows. It was done because empty output is not valid for this formats. [#13399](https://github.com/ClickHouse/ClickHouse/pull/13399) ([hcz](https://github.com/hczhcz)). +* Backported in [#13569](https://github.com/ClickHouse/ClickHouse/issues/13569): Fix possible race in `StorageMemory`. https://clickhouse-test-reports.s3.yandex.net/0/9cac8a7244063d2092ad25d45502611e18d3749c/stress_test_(thread)/stderr.log Have no idea how to write a test. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13442](https://github.com/ClickHouse/ClickHouse/issues/13442): Fix `aggregate function any(x) is found inside another aggregate function in query` error with `SET optimize_move_functions_out_of_any = 1` and aliases inside `any()`. [#13419](https://github.com/ClickHouse/ClickHouse/pull/13419) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#13486](https://github.com/ClickHouse/ClickHouse/issues/13486): Fix invalid return type for comparison of tuples with `NULL` elements. Fixes [#12461](https://github.com/ClickHouse/ClickHouse/issues/12461). [#13420](https://github.com/ClickHouse/ClickHouse/pull/13420) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13463](https://github.com/ClickHouse/ClickHouse/issues/13463): Fix error in `parseDateTimeBestEffort` function when unix timestamp was passed as an argument. This fixes [#13362](https://github.com/ClickHouse/ClickHouse/issues/13362). [#13441](https://github.com/ClickHouse/ClickHouse/pull/13441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13663](https://github.com/ClickHouse/ClickHouse/issues/13663): Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#13717](https://github.com/ClickHouse/ClickHouse/issues/13717): Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#13701](https://github.com/ClickHouse/ClickHouse/issues/13701): Do not optimize any(arrayJoin()) -> arrayJoin() under optimize_move_functions_out_of_any. [#13681](https://github.com/ClickHouse/ClickHouse/pull/13681) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13732](https://github.com/ClickHouse/ClickHouse/issues/13732): Fix incorrect message in `clickhouse-server.init` while checking user and group. [#13711](https://github.com/ClickHouse/ClickHouse/pull/13711) ([ylchou](https://github.com/ylchou)). +* Backported in [#13903](https://github.com/ClickHouse/ClickHouse/issues/13903): Fix incorrect sorting for `FixedString` columns. Fixes [#13182](https://github.com/ClickHouse/ClickHouse/issues/13182). [#13887](https://github.com/ClickHouse/ClickHouse/pull/13887) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v20.6.1.4066-prestable.md b/docs/changelogs/v20.6.1.4066-prestable.md new file mode 100644 index 00000000000..523709c2be5 --- /dev/null +++ b/docs/changelogs/v20.6.1.4066-prestable.md @@ -0,0 +1,184 @@ +### ClickHouse release v20.6.1.4066-prestable FIXME as compared to v20.5.1.3833-prestable + +#### Backward Incompatible Change +* `clickhouse-local` now uses an unique temporary data directory by default, not the current directory as before. If needed, the data directory can be explicitly specified with the `-- --path` option. [#11931](https://github.com/ClickHouse/ClickHouse/pull/11931) ([Alexander Kuzmenkov](https://github.com/akuzm)). + +#### New Feature +* Add `Alter table drop replica replica_name` support. This fixes [#7080](https://github.com/ClickHouse/ClickHouse/issues/7080). [#10679](https://github.com/ClickHouse/ClickHouse/pull/10679) ([sundyli](https://github.com/sundy-li)). +* Added new in-memory format of parts in `MergeTree`-family tables, which stores data in memory. Parts are written on disk at first merge. Part will be created in in-memory format if its size in rows or bytes is below thresholds `min_rows_for_compact_part` and `min_bytes_for_compact_part`. Also optional support of Write-Ahead-Log is available, which is enabled by default and is controlled by setting `in_memory_parts_enable_wal`. [#10697](https://github.com/ClickHouse/ClickHouse/pull/10697) ([Anton Popov](https://github.com/CurtizJ)). +* Add -Distinct combinator for aggregate functions. [#10930](https://github.com/ClickHouse/ClickHouse/pull/10930) ([Sofia Antipushina](https://github.com/Sonichka1311)). +* Support table engine mongo(host:port, database, collection, user, password). [#10931](https://github.com/ClickHouse/ClickHouse/pull/10931) ([ageraab](https://github.com/ageraab)). +* Add storage RabbitMQ. [#11069](https://github.com/ClickHouse/ClickHouse/pull/11069) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Opt-in settings to send crash reports to the ClickHouse core team via [Sentry](https://sentry.io). [#11300](https://github.com/ClickHouse/ClickHouse/pull/11300) ([Ivan Blinkov](https://github.com/blinkov)). +* Add ORCBlockOutputFormat. [#11662](https://github.com/ClickHouse/ClickHouse/pull/11662) ([Kruglov Pavel](https://github.com/Avogar)). +* `max_thread_pool_size` config for changing the maximum number of Threads in Global Thread Pool. [#11668](https://github.com/ClickHouse/ClickHouse/pull/11668) ([Bharat Nallan](https://github.com/bharatnc)). +* Initial implementation of `EXPLAIN` query. Syntax: `EXPLAIN SELECT ...`. This fixes [#1118](https://github.com/ClickHouse/ClickHouse/issues/1118). [#11873](https://github.com/ClickHouse/ClickHouse/pull/11873) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Switched paths in S3 metadata to relative which allows to handle S3 blobs more easily. [#11892](https://github.com/ClickHouse/ClickHouse/pull/11892) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Avro UUID input/output support. [#11954](https://github.com/ClickHouse/ClickHouse/pull/11954) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Added read-only MongoDB table engine. Allows reading flat (primitive types, not nested) fields. [#11983](https://github.com/ClickHouse/ClickHouse/pull/11983) ([alesapin](https://github.com/alesapin)). +* Add setting to fields not found in Avro schema. [#12007](https://github.com/ClickHouse/ClickHouse/pull/12007) ([Andrew Onyshchuk](https://github.com/oandrew)). +* add function `parseDateTimeBestEffortUS`. [#12028](https://github.com/ClickHouse/ClickHouse/pull/12028) ([flynn](https://github.com/ucasfl)). +* #4006 Support ALTER TABLE ... [ADD|MODIFY] COLUMN ... FIRST. [#12073](https://github.com/ClickHouse/ClickHouse/pull/12073) ([Winter Zhang](https://github.com/zhang2014)). +* Add a function initializedAggregation to initialize an aggregation based on a single value. [#12109](https://github.com/ClickHouse/ClickHouse/pull/12109) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Support RIGHT and FULL JOIN with `set join_algorithm=partial_merge`. Only ALL strictness is supported (ANY, SEMI, ANTI, ASOF are not). [#12118](https://github.com/ClickHouse/ClickHouse/pull/12118) ([Artem Zuikov](https://github.com/4ertus2)). +* Implementation of PostgreSQL-like ILIKE operator for [#11710](https://github.com/ClickHouse/ClickHouse/issues/11710). [#12125](https://github.com/ClickHouse/ClickHouse/pull/12125) ([Mike Kot](https://github.com/myrrc)). +* Allow Nullable types as keys in MergeTree tables. [#5319](https://github.com/ClickHouse/ClickHouse/issues/5319). [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) ([Amos Bird](https://github.com/amosbird)). + +#### Performance Improvement +* Allow to use direct_io and mmap_io for secondary indices if the settings `min_bytes_to_use_direct_io` or `min_bytes_to_use_mmap_io` are configured. [#11955](https://github.com/ClickHouse/ClickHouse/pull/11955) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix estimation of the number of marks while reading from MergeTree. This is needed to correctly handle the settings `merge_tree_max_rows_to_use_cache`, `merge_tree_max_bytes_to_use_cache`, `merge_tree_min_rows_for_concurrent_read`, `merge_tree_min_bytes_for_concurrent_read`, `merge_tree_min_rows_for_seek`, `merge_tree_min_bytes_for_seek`. Now settings `min_bytes_to_use_mmap_io` also applied to read index and compact parts in MergeTree table engines family. [#11970](https://github.com/ClickHouse/ClickHouse/pull/11970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574) Index not used for IN operator with literals", performance regression introduced around v19.3. [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)). +* Remove injective functions inside `uniq*()` if `set optimize_injective_functions_inside_uniq=1`. [#12337](https://github.com/ClickHouse/ClickHouse/pull/12337) ([Artem Zuikov](https://github.com/4ertus2)). +* Add order by optimisation that rewrites `ORDER BY x, f(x)` with `ORDER by x` if `set optimize_redundant_functions_in_order_by = 1`. [#12404](https://github.com/ClickHouse/ClickHouse/pull/12404) ([Artem Zuikov](https://github.com/4ertus2)). +* Lower memory usage for some operations up to 2 times. [#12424](https://github.com/ClickHouse/ClickHouse/pull/12424) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Add number of errors to ignore while choosing replicas (`distributed_replica_error_ignore`). [#11669](https://github.com/ClickHouse/ClickHouse/pull/11669) ([Azat Khuzhin](https://github.com/azat)). +* Improved performace of 'ORDER BY' and 'GROUP BY' by prefix of sorting key. [#11696](https://github.com/ClickHouse/ClickHouse/pull/11696) ([Anton Popov](https://github.com/CurtizJ)). +* - Add `optimize_skip_unused_shards_nesting` (allows control nesting level for shards skipping optimization) - Add `force_skip_optimize_shards_nesting` (allows control nesting level for checking was shards skipped or not) - Deprecate `force_optimize_skip_unused_shards_no_nested` (`force_skip_optimize_shards_nesting` should be used instead) - Disable `optimize_skip_unused_shards` if sharding_key has non-deterministic func (i.e. `rand()`, note that this does not changes anything for INSERT side). [#11715](https://github.com/ClickHouse/ClickHouse/pull/11715) ([Azat Khuzhin](https://github.com/azat)). +* Multiversion metadata for storages without structure locks. [#11745](https://github.com/ClickHouse/ClickHouse/pull/11745) ([alesapin](https://github.com/alesapin)). +* Slightly relax the validation of ODBC connection string. If the hostname or username contains only word characters along with `.` and `-`, don't put it into curly braces. It is needed, because some ODBC drivers (e.g. PostgreSQL) don't understand when hostname is enclosed in curly braces. [#11845](https://github.com/ClickHouse/ClickHouse/pull/11845) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support parse UUID without separator(separators are always removed in most implementations, this is helpful for users to write data). [#11856](https://github.com/ClickHouse/ClickHouse/pull/11856) ([Winter Zhang](https://github.com/zhang2014)). +* Support `SIGNED` and `UNSIGNED` modifiers of standard integer types (`BIGINT`, `INT`, ...) for compatibility with MySQL. [#11858](https://github.com/ClickHouse/ClickHouse/pull/11858) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to use `sumWithOverflow` as `SimpleAggregateFunction`. Closes [#8053](https://github.com/ClickHouse/ClickHouse/issues/8053). [#11865](https://github.com/ClickHouse/ClickHouse/pull/11865) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add FixedString support in Hashing functions. [#11878](https://github.com/ClickHouse/ClickHouse/pull/11878) ([flynn](https://github.com/ucasfl)). +* Rewrite code for `optimize_arithmetic_operations_in_aggregate_functions` optimisation. [#11899](https://github.com/ClickHouse/ClickHouse/pull/11899) ([Artem Zuikov](https://github.com/4ertus2)). +* Improve path concatenation and fix double slashed paths using std::filesystem::path instead of std::string in `DatabaseOrdinary.cpp`. [#11900](https://github.com/ClickHouse/ClickHouse/pull/11900) ([Bharat Nallan](https://github.com/bharatnc)). +* Deprecate the old regular style and use the new globalVariable method [#11832](https://github.com/ClickHouse/ClickHouse/issues/11832). [#11901](https://github.com/ClickHouse/ClickHouse/pull/11901) ([BohuTANG](https://github.com/BohuTANG)). +* related to [issue 9797](https://github.com/ClickHouse/ClickHouse/issues/9797). [#11923](https://github.com/ClickHouse/ClickHouse/pull/11923) ([flynn](https://github.com/ucasfl)). +* `system.tables` now considers column capacities for Memory and Buffer table engines, which is better approximation for resident memory size. [#11935](https://github.com/ClickHouse/ClickHouse/pull/11935) ([Max Akhmedov](https://github.com/zlobober)). +* Add CPU frequencies to system.asynchronous_metrics. Make the metric collection period configurable. [#11972](https://github.com/ClickHouse/ClickHouse/pull/11972) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Allow to perform "metadata-only" ALTER of partition key such as adding more elements to Enum data type. This fixes [#7513](https://github.com/ClickHouse/ClickHouse/issues/7513). [#11973](https://github.com/ClickHouse/ClickHouse/pull/11973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add replica priority for load_balancing (for manual prioritization of the load balancing). [#11995](https://github.com/ClickHouse/ClickHouse/pull/11995) ([Azat Khuzhin](https://github.com/azat)). +* Support MySQL engine reading Enums type [#3985](https://github.com/ClickHouse/ClickHouse/issues/3985). [#11996](https://github.com/ClickHouse/ClickHouse/pull/11996) ([BohuTANG](https://github.com/BohuTANG)). +* Implemented single part uploads for DiskS3. [#12026](https://github.com/ClickHouse/ClickHouse/pull/12026) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Moved useless S3 logging to TRACE level. [#12067](https://github.com/ClickHouse/ClickHouse/pull/12067) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Improves `REVOKE` command: now it requires grant/admin option for only access which will be revoked. For example, to execute `REVOKE ALL ON *.* FROM user1` now it doesn't require to have full access rights granted with grant option. Added command `REVOKE ALL FROM user1` - it revokes all granted roles from `user1`. [#12083](https://github.com/ClickHouse/ClickHouse/pull/12083) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add 'type' column in system.disks. [#12115](https://github.com/ClickHouse/ClickHouse/pull/12115) ([ianton-ru](https://github.com/ianton-ru)). +* Added support for `%g` (two digit ISO year) and `%G` (four digit ISO year) substitutions in `formatDateTime` function. [#12136](https://github.com/ClickHouse/ClickHouse/pull/12136) ([vivarum](https://github.com/vivarum)). +* Add `KILL QUERY [connection_id]` for the MySQL client/driver to cancel the long query, issue [#12038](https://github.com/ClickHouse/ClickHouse/issues/12038). [#12152](https://github.com/ClickHouse/ClickHouse/pull/12152) ([BohuTANG](https://github.com/BohuTANG)). +* 1. Support MySQL 'SELECT DATABASE()' [#9336](https://github.com/ClickHouse/ClickHouse/issues/9336) 2. Add MySQL replacement query integration test. [#12314](https://github.com/ClickHouse/ClickHouse/pull/12314) ([BohuTANG](https://github.com/BohuTANG)). +* This setting allows to chose charset for printing grids (either utf8 or ascii). [#12372](https://github.com/ClickHouse/ClickHouse/pull/12372) ([Sabyanin Maxim](https://github.com/s-mx)). +* Write the detail exception message to the client instead of 'MySQL server has gone away'. [#12383](https://github.com/ClickHouse/ClickHouse/pull/12383) ([BohuTANG](https://github.com/BohuTANG)). +* lifetime_rows/lifetime_bytes for Buffer engine. [#12421](https://github.com/ClickHouse/ClickHouse/pull/12421) ([Azat Khuzhin](https://github.com/azat)). +* Use correct default secure port for clickhouse-benchmark with `--secure` argument. This fixes [#11044](https://github.com/ClickHouse/ClickHouse/issues/11044). [#12440](https://github.com/ClickHouse/ClickHouse/pull/12440) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Fix wrong result for `if()` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix memory accounting via HTTP interface (can be significant with `wait_end_of_query=1`). [#11840](https://github.com/ClickHouse/ClickHouse/pull/11840) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix potential floating point exception when parsing DateTime64. This fixes [#11374](https://github.com/ClickHouse/ClickHouse/issues/11374). [#11875](https://github.com/ClickHouse/ClickHouse/pull/11875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed bug with no moves when changing storage policy from default one. [#11893](https://github.com/ClickHouse/ClickHouse/pull/11893) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix rare crash caused by using Nullable column in prewhere condition. Continuation of [#11869](https://github.com/ClickHouse/ClickHouse/issues/11869). [#11895](https://github.com/ClickHouse/ClickHouse/pull/11895) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Keep aliases for substitutions in query (parametrized queries). This fixes [#11914](https://github.com/ClickHouse/ClickHouse/issues/11914). [#11916](https://github.com/ClickHouse/ClickHouse/pull/11916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unitialized memory in partitions serialization. [#11919](https://github.com/ClickHouse/ClickHouse/pull/11919) ([alesapin](https://github.com/alesapin)). +* Use the correct current database for checking access rights after statement `USE database`. [#11920](https://github.com/ClickHouse/ClickHouse/pull/11920) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed `Query parameter was not set` in `Values` format. Fixes [#11918](https://github.com/ClickHouse/ClickHouse/issues/11918). [#11936](https://github.com/ClickHouse/ClickHouse/pull/11936) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix race condition in extractAllGroups* functions. [#11949](https://github.com/ClickHouse/ClickHouse/pull/11949) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make mmap IO work again (experimental). Continuation of [#8520](https://github.com/ClickHouse/ClickHouse/issues/8520). [#11953](https://github.com/ClickHouse/ClickHouse/pull/11953) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong setting name in log message at server startup. [#11997](https://github.com/ClickHouse/ClickHouse/pull/11997) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Partial revokes work correctly in complex cases as well, for example. [#12002](https://github.com/ClickHouse/ClickHouse/pull/12002) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix potential floating point exception. This closes [#11378](https://github.com/ClickHouse/ClickHouse/issues/11378). [#12005](https://github.com/ClickHouse/ClickHouse/pull/12005) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid returning wrong number of geohashes in function `geoHashesInBox` due to accumulation of floating point error. This fixes [#11369](https://github.com/ClickHouse/ClickHouse/issues/11369). [#12006](https://github.com/ClickHouse/ClickHouse/pull/12006) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential array size overflow in generateRandom that may lead to crash. This fixes [#11371](https://github.com/ClickHouse/ClickHouse/issues/11371). [#12013](https://github.com/ClickHouse/ClickHouse/pull/12013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix calculation of access rights when allow_ddl=0. [#12015](https://github.com/ClickHouse/ClickHouse/pull/12015) ([Vitaly Baranov](https://github.com/vitlibar)). +* When adding floating point number of intervals to date/datetime, the result may be calculated incorrectly. This fixes [#11377](https://github.com/ClickHouse/ClickHouse/issues/11377). [#12018](https://github.com/ClickHouse/ClickHouse/pull/12018) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* A query with function `neighbor` as the only returned expression may return empty result if the function is called with offset `-9223372036854775808`. This fixes [#11367](https://github.com/ClickHouse/ClickHouse/issues/11367). [#12019](https://github.com/ClickHouse/ClickHouse/pull/12019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not mention in changelog, because the bug did not come to release. Fix potential crash when doing ORDER BY multiple columns with specified COLLATE on one of the column when this column is constant. This fixes [#11379](https://github.com/ClickHouse/ClickHouse/issues/11379). The bug was introduced in [#11006](https://github.com/ClickHouse/ClickHouse/issues/11006) in version 20.5. [#12020](https://github.com/ClickHouse/ClickHouse/pull/12020) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result and potential crash when invoking function `if` with arguments of type `FixedString` with different sizes. This fixes [#11362](https://github.com/ClickHouse/ClickHouse/issues/11362). [#12021](https://github.com/ClickHouse/ClickHouse/pull/12021) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix calculation of access rights when allow_introspection_functions=0. [#12031](https://github.com/ClickHouse/ClickHouse/pull/12031) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash in JOIN with LowCardinality type with `join_algorithm=partial_merge`. [#12035](https://github.com/ClickHouse/ClickHouse/pull/12035) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix incorrect comparison of tuples with `Nullable` columns. Fixes [#11985](https://github.com/ClickHouse/ClickHouse/issues/11985). [#12039](https://github.com/ClickHouse/ClickHouse/pull/12039) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix constraints check if constraint is a constant expression. This fixes [#11360](https://github.com/ClickHouse/ClickHouse/issues/11360). [#12042](https://github.com/ClickHouse/ClickHouse/pull/12042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `topK` aggregate function return Enum for Enum types. This fixes [#3740](https://github.com/ClickHouse/ClickHouse/issues/3740). [#12043](https://github.com/ClickHouse/ClickHouse/pull/12043) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parse tables metadata in parallel when loading database. This fixes slow server startup when there are large number of tables. [#12045](https://github.com/ClickHouse/ClickHouse/pull/12045) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error `Cannot capture column` for higher-order functions with `Tuple(LowCardinality)` argument. Fixes [#9766](https://github.com/ClickHouse/ClickHouse/issues/9766). [#12055](https://github.com/ClickHouse/ClickHouse/pull/12055) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error `Expected single dictionary argument for function` for function `defaultValueOfArgumentType` with `LowCardinality` type. Fixes [#11808](https://github.com/ClickHouse/ClickHouse/issues/11808). [#12056](https://github.com/ClickHouse/ClickHouse/pull/12056) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible crash while using wrong type for `PREWHERE`. Fixes [#12053](https://github.com/ClickHouse/ClickHouse/issues/12053), [#12060](https://github.com/ClickHouse/ClickHouse/issues/12060). [#12060](https://github.com/ClickHouse/ClickHouse/pull/12060) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV in StorageKafka on DROP TABLE. [#12075](https://github.com/ClickHouse/ClickHouse/pull/12075) ([Azat Khuzhin](https://github.com/azat)). +* Fix unnecessary limiting the number of threads for selects from `VIEW`. Fixes [#11937](https://github.com/ClickHouse/ClickHouse/issues/11937). [#12085](https://github.com/ClickHouse/ClickHouse/pull/12085) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix empty `result_rows` and `result_bytes` metrics in `system.quey_log` for selects. Fixes [#11595](https://github.com/ClickHouse/ClickHouse/issues/11595). [#12089](https://github.com/ClickHouse/ClickHouse/pull/12089) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix segfault with `-StateResample` combinators. [#12092](https://github.com/ClickHouse/ClickHouse/pull/12092) ([Anton Popov](https://github.com/CurtizJ)). +* Fix performance for selects with `UNION` caused by wrong limit for the total number of threads. Fixes [#12030](https://github.com/ClickHouse/ClickHouse/issues/12030). [#12103](https://github.com/ClickHouse/ClickHouse/pull/12103) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Format `Parquet` now properly works with `LowCardinality` and `LowCardinality(Nullable)` types. Fixes [#12086](https://github.com/ClickHouse/ClickHouse/issues/12086), [#8406](https://github.com/ClickHouse/ClickHouse/issues/8406). [#12108](https://github.com/ClickHouse/ClickHouse/pull/12108) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix handling dependency of table with ENGINE=Dictionary on dictionary. This fixes [#10994](https://github.com/ClickHouse/ClickHouse/issues/10994). This fixes [#10397](https://github.com/ClickHouse/ClickHouse/issues/10397). [#12116](https://github.com/ClickHouse/ClickHouse/pull/12116) ([Vitaly Baranov](https://github.com/vitlibar)). +* Avoid "There is no query" exception for materialized views with joins or with subqueries attached to system logs (system.query_log, metric_log, etc) or to engine=Buffer underlying table. [#12120](https://github.com/ClickHouse/ClickHouse/pull/12120) ([filimonov](https://github.com/filimonov)). +* Fix bug which leads to incorrect table metadata in ZooKeepeer for ReplicatedVersionedCollapsingMergeTree tables. Fixes [#12093](https://github.com/ClickHouse/ClickHouse/issues/12093). [#12121](https://github.com/ClickHouse/ClickHouse/pull/12121) ([alesapin](https://github.com/alesapin)). +* Normalize "pid" file handling. In previous versions the server may refuse to start if it was killed without proper shutdown and if there is another process that has the same pid as previously runned server. Also pid file may be removed in unsuccessful server startup even if there is another server running. This fixes [#3501](https://github.com/ClickHouse/ClickHouse/issues/3501). [#12133](https://github.com/ClickHouse/ClickHouse/pull/12133) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential infinite loop in `greatCircleDistance`, `geoDistance`. This fixes [#12117](https://github.com/ClickHouse/ClickHouse/issues/12117). [#12137](https://github.com/ClickHouse/ClickHouse/pull/12137) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential overflow in integer division. This fixes [#12119](https://github.com/ClickHouse/ClickHouse/issues/12119). [#12140](https://github.com/ClickHouse/ClickHouse/pull/12140) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad code in redundant ORDER BY optimization. The bug was introduced in [#10067](https://github.com/ClickHouse/ClickHouse/issues/10067). [#12148](https://github.com/ClickHouse/ClickHouse/pull/12148) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix transform of query to send to external DBMS (e.g. MySQL, ODBC) in presense of aliases. This fixes [#12032](https://github.com/ClickHouse/ClickHouse/issues/12032). [#12151](https://github.com/ClickHouse/ClickHouse/pull/12151) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong logic in ALTER DELETE that leads to deleting of records when condition evaluates to NULL. This fixes [#9088](https://github.com/ClickHouse/ClickHouse/issues/9088). This closes [#12106](https://github.com/ClickHouse/ClickHouse/issues/12106). [#12153](https://github.com/ClickHouse/ClickHouse/pull/12153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't split the dictionary source's table name into schema and table name itself if ODBC connection doesn't support schema. [#12165](https://github.com/ClickHouse/ClickHouse/pull/12165) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed the behaviour when `SummingMergeTree` engine sums up columns from partition key. Added an exception in case of explicit definition of columns to sum which intersects with partition key columns. This fixes [#7867](https://github.com/ClickHouse/ClickHouse/issues/7867). [#12173](https://github.com/ClickHouse/ClickHouse/pull/12173) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix dictGet arguments check during GROUP BY injective functions elimination. [#12179](https://github.com/ClickHouse/ClickHouse/pull/12179) ([Azat Khuzhin](https://github.com/azat)). +* Cap max_memory_usage* limits to the process resident memory. [#12182](https://github.com/ClickHouse/ClickHouse/pull/12182) ([Azat Khuzhin](https://github.com/azat)). +* Fixed logical functions for UInt8 values when they are not equal to 0 or 1. [#12196](https://github.com/ClickHouse/ClickHouse/pull/12196) ([Alexander Kazakov](https://github.com/Akazz)). +* Fixed the behaviour when during multiple sequential inserts in `StorageFile` header for some special types was written more than once. This fixed [#6155](https://github.com/ClickHouse/ClickHouse/issues/6155). [#12197](https://github.com/ClickHouse/ClickHouse/pull/12197) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixed behaviour on reaching redirect limit in request to S3 storage. [#12256](https://github.com/ClickHouse/ClickHouse/pull/12256) ([ianton-ru](https://github.com/ianton-ru)). +* Not for changelog. Cherry-pick after [#12196](https://github.com/ClickHouse/ClickHouse/issues/12196). [#12271](https://github.com/ClickHouse/ClickHouse/pull/12271) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implement conversions to the common type for LowCardinality types. This allows to execute UNION ALL of tables with columns of LowCardinality and other columns. This fixes [#8212](https://github.com/ClickHouse/ClickHouse/issues/8212). This fixes [#4342](https://github.com/ClickHouse/ClickHouse/issues/4342). [#12275](https://github.com/ClickHouse/ClickHouse/pull/12275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `arrayFill` worked incorrectly for empty arrays that may lead to crash. This fixes [#12263](https://github.com/ClickHouse/ClickHouse/issues/12263). [#12279](https://github.com/ClickHouse/ClickHouse/pull/12279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Show error after TrieDictionary failed to load. [#12290](https://github.com/ClickHouse/ClickHouse/pull/12290) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix typo in setting name. [#12292](https://github.com/ClickHouse/ClickHouse/pull/12292) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some threads might randomly hang for a few seconds during DNS cache updating. It's fixed. [#12296](https://github.com/ClickHouse/ClickHouse/pull/12296) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix TTL after renaming column, on which depends TTL expression. [#12304](https://github.com/ClickHouse/ClickHouse/pull/12304) ([Anton Popov](https://github.com/CurtizJ)). +* Avoid "bad cast" exception when there is an expression that filters data by virtual columns (like `_table` in `Merge` tables) or by "index" columns in system tables such as filtering by database name when querying from `system.tables`, and this expression returns `Nullable` type. This fixes [#12166](https://github.com/ClickHouse/ClickHouse/issues/12166). [#12305](https://github.com/ClickHouse/ClickHouse/pull/12305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix order of columns in `WITH FILL` modifier. Previously order of columns of `ORDER BY` statement wasn't respected. [#12306](https://github.com/ClickHouse/ClickHouse/pull/12306) ([Anton Popov](https://github.com/CurtizJ)). +* When using codec `Delta` or `DoubleDelta` with non fixed width types, exception with code `LOGICAL_ERROR` was returned instead of exception with code `BAD_ARGUMENTS` (we ensure that exceptions with code logical error never happen). This fixes [#12110](https://github.com/ClickHouse/ClickHouse/issues/12110). [#12308](https://github.com/ClickHouse/ClickHouse/pull/12308) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix very rare race condition in ReplicatedMergeTreeQueue. [#12315](https://github.com/ClickHouse/ClickHouse/pull/12315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error message and exit codes for `ALTER RENAME COLUMN` queries, when `RENAME` is not allowed. Fixes [#12301](https://github.com/ClickHouse/ClickHouse/issues/12301) and [#12303](https://github.com/ClickHouse/ClickHouse/issues/12303). [#12335](https://github.com/ClickHouse/ClickHouse/pull/12335) ([alesapin](https://github.com/alesapin)). +* Fix TOTALS/ROLLUP/CUBE for aggregate functions with `-State` and `Nullable` arguments. This fixes [#12163](https://github.com/ClickHouse/ClickHouse/issues/12163). [#12376](https://github.com/ClickHouse/ClickHouse/pull/12376) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to CLEAR column even if there are depending DEFAULT expressions. This fixes [#12333](https://github.com/ClickHouse/ClickHouse/issues/12333). [#12378](https://github.com/ClickHouse/ClickHouse/pull/12378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid exception when negative or floating point constant is used in WHERE condition for indexed tables. This fixes [#11905](https://github.com/ClickHouse/ClickHouse/issues/11905). [#12384](https://github.com/ClickHouse/ClickHouse/pull/12384) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Additional check for arguments of bloom filter index. This fixes [#11408](https://github.com/ClickHouse/ClickHouse/issues/11408). [#12388](https://github.com/ClickHouse/ClickHouse/pull/12388) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reverts change introduced in [#11079](https://github.com/ClickHouse/ClickHouse/issues/11079) to resolve [#12098](https://github.com/ClickHouse/ClickHouse/issues/12098). [#12397](https://github.com/ClickHouse/ClickHouse/pull/12397) ([Mike Kot](https://github.com/myrrc)). +* Fixed possible segfault if StorageMerge. Closes [#12054](https://github.com/ClickHouse/ClickHouse/issues/12054). [#12401](https://github.com/ClickHouse/ClickHouse/pull/12401) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix overflow when very large LIMIT or OFFSET is specified. This fixes [#10470](https://github.com/ClickHouse/ClickHouse/issues/10470). This fixes [#11372](https://github.com/ClickHouse/ClickHouse/issues/11372). [#12427](https://github.com/ClickHouse/ClickHouse/pull/12427) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in base64 if tests were run on server with AVX-512. This fixes [#12318](https://github.com/ClickHouse/ClickHouse/issues/12318). Author: @qoega. [#12441](https://github.com/ClickHouse/ClickHouse/pull/12441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Add simple GitHub hook script for the serverless environment. [#11605](https://github.com/ClickHouse/ClickHouse/pull/11605) ([alesapin](https://github.com/alesapin)). +* Send logs to client on fatal errors if possible. This will make test results more readable. [#11826](https://github.com/ClickHouse/ClickHouse/pull/11826) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow ClickHouse to run on Android. [#11894](https://github.com/ClickHouse/ClickHouse/pull/11894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clean up unused header files from `Obfuscator.cpp` and `DatabaseAtomic.cpp`. [#11922](https://github.com/ClickHouse/ClickHouse/pull/11922) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix some typos in code. [#12003](https://github.com/ClickHouse/ClickHouse/pull/12003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Scripts for automated scheduled backporting based on PR labels. [#12029](https://github.com/ClickHouse/ClickHouse/pull/12029) ([Ivan](https://github.com/abyss7)). +* Add new type of tests based on Testflows framework. [#12090](https://github.com/ClickHouse/ClickHouse/pull/12090) ([vzakaznikov](https://github.com/vzakaznikov)). +* Install `ca-certificates` before the first `apt-get update` in Dockerfile. [#12095](https://github.com/ClickHouse/ClickHouse/pull/12095) ([Ivan Blinkov](https://github.com/blinkov)). +* Daily check by [GitHub CodeQL](https://securitylab.github.com/tools/codeql) security analysis tool that looks for [CWE](https://cwe.mitre.org/). [#12101](https://github.com/ClickHouse/ClickHouse/pull/12101) ([Ivan Blinkov](https://github.com/blinkov)). +* Regular check by [Anchore Container Analysis](https://docs.anchore.com) security analysis tool that looks for [CVE](https://cve.mitre.org/) in `clickhouse-server` Docker image. Also confirms that `Dockerfile` is buildable. Runs daily on `master` and on pull-requests to `Dockerfile`. [#12102](https://github.com/ClickHouse/ClickHouse/pull/12102) ([Ivan Blinkov](https://github.com/blinkov)). +* Add `UNBUNDLED` flag to `system.build_options` table. Move skip lists for `clickhouse-test` to clickhouse repo. [#12107](https://github.com/ClickHouse/ClickHouse/pull/12107) ([alesapin](https://github.com/alesapin)). +* Implement AST-based query fuzzing mode for clickhouse-client. See [this label](https://github.com/ClickHouse/ClickHouse/issues?q=label%3Afuzz+is%3Aissue) for the list of issues we recently found by fuzzing. Most of them were found by this tool, and a couple by SQLancer and `00746_sql_fuzzy.pl`. [#12111](https://github.com/ClickHouse/ClickHouse/pull/12111) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Perform an upgrade of system packages in the `clickhouse-server` docker image. [#12124](https://github.com/ClickHouse/ClickHouse/pull/12124) ([Ivan Blinkov](https://github.com/blinkov)). +* Added a showcase of the minimal Docker image without using any Linux distribution. [#12126](https://github.com/ClickHouse/ClickHouse/pull/12126) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor CMake fixes for UNBUNDLED build. [#12131](https://github.com/ClickHouse/ClickHouse/pull/12131) ([Matwey V. Kornilov](https://github.com/matwey)). +* Missed `` is required for `std::atomic<>`. [#12134](https://github.com/ClickHouse/ClickHouse/pull/12134) ([Matwey V. Kornilov](https://github.com/matwey)). +* Fix warnings from CodeQL. `CodeQL` is another static analyzer that we will use along with `clang-tidy` and `PVS-Studio` that we use already. [#12138](https://github.com/ClickHouse/ClickHouse/pull/12138) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Record additional detail on Dockerfile scan reports. [#12159](https://github.com/ClickHouse/ClickHouse/pull/12159) ([Ivan Blinkov](https://github.com/blinkov)). +* Place common docker compose files to integration docker container. [#12168](https://github.com/ClickHouse/ClickHouse/pull/12168) ([Ilya Yatsishin](https://github.com/qoega)). +* Remove verbosity from the binary builds. [#12174](https://github.com/ClickHouse/ClickHouse/pull/12174) ([alesapin](https://github.com/alesapin)). +* Remove strange file creation during build in `orc`. [#12258](https://github.com/ClickHouse/ClickHouse/pull/12258) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now functional and stress tests will be able to run with old version of `clickhouse-test` script. [#12287](https://github.com/ClickHouse/ClickHouse/pull/12287) ([alesapin](https://github.com/alesapin)). +* Log sanitizer trap messages from separate thread. This will prevent possible deadlock under thread sanitizer. [#12313](https://github.com/ClickHouse/ClickHouse/pull/12313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added S3 HTTPS integration test. [#12412](https://github.com/ClickHouse/ClickHouse/pull/12412) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix UBSan report in HDFS library. This closes [#12330](https://github.com/ClickHouse/ClickHouse/issues/12330). [#12453](https://github.com/ClickHouse/ClickHouse/pull/12453) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Other +* Update word break characters to match readline default - all non-alphanumeric characters. ... [#11975](https://github.com/ClickHouse/ClickHouse/pull/11975) ([Andrew Onyshchuk](https://github.com/oandrew)). + +#### NO CL CATEGORY + +* * Not for changelog. [#12265](https://github.com/ClickHouse/ClickHouse/pull/12265) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* ... [#12431](https://github.com/ClickHouse/ClickHouse/pull/12431) ([Tom Bombadil](https://github.com/ithangzhou)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump certifi from 2020.4.5.2 to 2020.6.20 in /docs/tools/translate'. [#11853](https://github.com/ClickHouse/ClickHouse/pull/11853) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Merging [#10679](https://github.com/ClickHouse/ClickHouse/issues/10679)'. [#11896](https://github.com/ClickHouse/ClickHouse/pull/11896) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "[experiment] maybe fix warnings in integration tests"'. [#12011](https://github.com/ClickHouse/ClickHouse/pull/12011) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Bump idna from 2.9 to 2.10 in /docs/tools'. [#12024](https://github.com/ClickHouse/ClickHouse/pull/12024) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump python-slugify from 1.2.6 to 4.0.1 in /docs/tools'. [#12049](https://github.com/ClickHouse/ClickHouse/pull/12049) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + diff --git a/docs/changelogs/v20.6.10.2-stable.md b/docs/changelogs/v20.6.10.2-stable.md new file mode 100644 index 00000000000..489fd86ecf5 --- /dev/null +++ b/docs/changelogs/v20.6.10.2-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v20.6.10.2-stable FIXME as compared to v20.6.9.1-stable + diff --git a/docs/changelogs/v20.6.11.1-stable.md b/docs/changelogs/v20.6.11.1-stable.md new file mode 100644 index 00000000000..d00259e2248 --- /dev/null +++ b/docs/changelogs/v20.6.11.1-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v20.6.11.1-stable FIXME as compared to v20.6.10.2-stable + diff --git a/docs/changelogs/v20.6.2.15-prestable.md b/docs/changelogs/v20.6.2.15-prestable.md new file mode 100644 index 00000000000..c1ea3c7a665 --- /dev/null +++ b/docs/changelogs/v20.6.2.15-prestable.md @@ -0,0 +1,204 @@ +### ClickHouse release v20.6.2.15-prestable FIXME as compared to v20.5.1.3833-prestable + +#### Backward Incompatible Change +* `clickhouse-local` now uses an unique temporary data directory by default, not the current directory as before. If needed, the data directory can be explicitly specified with the `-- --path` option. [#11931](https://github.com/ClickHouse/ClickHouse/pull/11931) ([Alexander Kuzmenkov](https://github.com/akuzm)). + +#### New Feature +* Add `Alter table drop replica replica_name` support. This fixes [#7080](https://github.com/ClickHouse/ClickHouse/issues/7080). [#10679](https://github.com/ClickHouse/ClickHouse/pull/10679) ([sundyli](https://github.com/sundy-li)). +* Added new in-memory format of parts in `MergeTree`-family tables, which stores data in memory. Parts are written on disk at first merge. Part will be created in in-memory format if its size in rows or bytes is below thresholds `min_rows_for_compact_part` and `min_bytes_for_compact_part`. Also optional support of Write-Ahead-Log is available, which is enabled by default and is controlled by setting `in_memory_parts_enable_wal`. [#10697](https://github.com/ClickHouse/ClickHouse/pull/10697) ([Anton Popov](https://github.com/CurtizJ)). +* Add -Distinct combinator for aggregate functions. [#10930](https://github.com/ClickHouse/ClickHouse/pull/10930) ([Sofia Antipushina](https://github.com/Sonichka1311)). +* Support table engine mongo(host:port, database, collection, user, password). [#10931](https://github.com/ClickHouse/ClickHouse/pull/10931) ([ageraab](https://github.com/ageraab)). +* Add storage RabbitMQ. [#11069](https://github.com/ClickHouse/ClickHouse/pull/11069) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Opt-in settings to send crash reports to the ClickHouse core team via [Sentry](https://sentry.io). [#11300](https://github.com/ClickHouse/ClickHouse/pull/11300) ([Ivan Blinkov](https://github.com/blinkov)). +* Add ORCBlockOutputFormat. [#11662](https://github.com/ClickHouse/ClickHouse/pull/11662) ([Kruglov Pavel](https://github.com/Avogar)). +* `max_thread_pool_size` config for changing the maximum number of Threads in Global Thread Pool. [#11668](https://github.com/ClickHouse/ClickHouse/pull/11668) ([Bharat Nallan](https://github.com/bharatnc)). +* Initial implementation of `EXPLAIN` query. Syntax: `EXPLAIN SELECT ...`. This fixes [#1118](https://github.com/ClickHouse/ClickHouse/issues/1118). [#11873](https://github.com/ClickHouse/ClickHouse/pull/11873) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Switched paths in S3 metadata to relative which allows to handle S3 blobs more easily. [#11892](https://github.com/ClickHouse/ClickHouse/pull/11892) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Avro UUID input/output support. [#11954](https://github.com/ClickHouse/ClickHouse/pull/11954) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Added read-only MongoDB table engine. Allows reading flat (primitive types, not nested) fields. [#11983](https://github.com/ClickHouse/ClickHouse/pull/11983) ([alesapin](https://github.com/alesapin)). +* Add setting to fields not found in Avro schema. [#12007](https://github.com/ClickHouse/ClickHouse/pull/12007) ([Andrew Onyshchuk](https://github.com/oandrew)). +* add function `parseDateTimeBestEffortUS`. [#12028](https://github.com/ClickHouse/ClickHouse/pull/12028) ([flynn](https://github.com/ucasfl)). +* #4006 Support ALTER TABLE ... [ADD|MODIFY] COLUMN ... FIRST. [#12073](https://github.com/ClickHouse/ClickHouse/pull/12073) ([Winter Zhang](https://github.com/zhang2014)). +* Add a function initializedAggregation to initialize an aggregation based on a single value. [#12109](https://github.com/ClickHouse/ClickHouse/pull/12109) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Support RIGHT and FULL JOIN with `set join_algorithm=partial_merge`. Only ALL strictness is supported (ANY, SEMI, ANTI, ASOF are not). [#12118](https://github.com/ClickHouse/ClickHouse/pull/12118) ([Artem Zuikov](https://github.com/4ertus2)). +* Implementation of PostgreSQL-like ILIKE operator for [#11710](https://github.com/ClickHouse/ClickHouse/issues/11710). [#12125](https://github.com/ClickHouse/ClickHouse/pull/12125) ([Mike Kot](https://github.com/myrrc)). + +#### Performance Improvement +* Allow to use direct_io and mmap_io for secondary indices if the settings `min_bytes_to_use_direct_io` or `min_bytes_to_use_mmap_io` are configured. [#11955](https://github.com/ClickHouse/ClickHouse/pull/11955) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix estimation of the number of marks while reading from MergeTree. This is needed to correctly handle the settings `merge_tree_max_rows_to_use_cache`, `merge_tree_max_bytes_to_use_cache`, `merge_tree_min_rows_for_concurrent_read`, `merge_tree_min_bytes_for_concurrent_read`, `merge_tree_min_rows_for_seek`, `merge_tree_min_bytes_for_seek`. Now settings `min_bytes_to_use_mmap_io` also applied to read index and compact parts in MergeTree table engines family. [#11970](https://github.com/ClickHouse/ClickHouse/pull/11970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574) Index not used for IN operator with literals", performance regression introduced around v19.3. [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)). +* Remove injective functions inside `uniq*()` if `set optimize_injective_functions_inside_uniq=1`. [#12337](https://github.com/ClickHouse/ClickHouse/pull/12337) ([Artem Zuikov](https://github.com/4ertus2)). + +#### Improvement +* Add number of errors to ignore while choosing replicas (`distributed_replica_error_ignore`). [#11669](https://github.com/ClickHouse/ClickHouse/pull/11669) ([Azat Khuzhin](https://github.com/azat)). +* Improved performace of 'ORDER BY' and 'GROUP BY' by prefix of sorting key. [#11696](https://github.com/ClickHouse/ClickHouse/pull/11696) ([Anton Popov](https://github.com/CurtizJ)). +* - Add `optimize_skip_unused_shards_nesting` (allows control nesting level for shards skipping optimization) - Add `force_skip_optimize_shards_nesting` (allows control nesting level for checking was shards skipped or not) - Deprecate `force_optimize_skip_unused_shards_no_nested` (`force_skip_optimize_shards_nesting` should be used instead) - Disable `optimize_skip_unused_shards` if sharding_key has non-deterministic func (i.e. `rand()`, note that this does not changes anything for INSERT side). [#11715](https://github.com/ClickHouse/ClickHouse/pull/11715) ([Azat Khuzhin](https://github.com/azat)). +* Multiversion metadata for storages without structure locks. [#11745](https://github.com/ClickHouse/ClickHouse/pull/11745) ([alesapin](https://github.com/alesapin)). +* Slightly relax the validation of ODBC connection string. If the hostname or username contains only word characters along with `.` and `-`, don't put it into curly braces. It is needed, because some ODBC drivers (e.g. PostgreSQL) don't understand when hostname is enclosed in curly braces. [#11845](https://github.com/ClickHouse/ClickHouse/pull/11845) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support parse UUID without separator(separators are always removed in most implementations, this is helpful for users to write data). [#11856](https://github.com/ClickHouse/ClickHouse/pull/11856) ([Winter Zhang](https://github.com/zhang2014)). +* Support `SIGNED` and `UNSIGNED` modifiers of standard integer types (`BIGINT`, `INT`, ...) for compatibility with MySQL. [#11858](https://github.com/ClickHouse/ClickHouse/pull/11858) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to use `sumWithOverflow` as `SimpleAggregateFunction`. Closes [#8053](https://github.com/ClickHouse/ClickHouse/issues/8053). [#11865](https://github.com/ClickHouse/ClickHouse/pull/11865) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add FixedString support in Hashing functions. [#11878](https://github.com/ClickHouse/ClickHouse/pull/11878) ([flynn](https://github.com/ucasfl)). +* Rewrite code for `optimize_arithmetic_operations_in_aggregate_functions` optimisation. [#11899](https://github.com/ClickHouse/ClickHouse/pull/11899) ([Artem Zuikov](https://github.com/4ertus2)). +* Improve path concatenation and fix double slashed paths using std::filesystem::path instead of std::string in `DatabaseOrdinary.cpp`. [#11900](https://github.com/ClickHouse/ClickHouse/pull/11900) ([Bharat Nallan](https://github.com/bharatnc)). +* Deprecate the old regular style and use the new globalVariable method [#11832](https://github.com/ClickHouse/ClickHouse/issues/11832). [#11901](https://github.com/ClickHouse/ClickHouse/pull/11901) ([BohuTANG](https://github.com/BohuTANG)). +* related to [issue 9797](https://github.com/ClickHouse/ClickHouse/issues/9797). [#11923](https://github.com/ClickHouse/ClickHouse/pull/11923) ([flynn](https://github.com/ucasfl)). +* `system.tables` now considers column capacities for Memory and Buffer table engines, which is better approximation for resident memory size. [#11935](https://github.com/ClickHouse/ClickHouse/pull/11935) ([Max Akhmedov](https://github.com/zlobober)). +* Add CPU frequencies to system.asynchronous_metrics. Make the metric collection period configurable. [#11972](https://github.com/ClickHouse/ClickHouse/pull/11972) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Allow to perform "metadata-only" ALTER of partition key such as adding more elements to Enum data type. This fixes [#7513](https://github.com/ClickHouse/ClickHouse/issues/7513). [#11973](https://github.com/ClickHouse/ClickHouse/pull/11973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add replica priority for load_balancing (for manual prioritization of the load balancing). [#11995](https://github.com/ClickHouse/ClickHouse/pull/11995) ([Azat Khuzhin](https://github.com/azat)). +* Support MySQL engine reading Enums type [#3985](https://github.com/ClickHouse/ClickHouse/issues/3985). [#11996](https://github.com/ClickHouse/ClickHouse/pull/11996) ([BohuTANG](https://github.com/BohuTANG)). +* Implemented single part uploads for DiskS3. [#12026](https://github.com/ClickHouse/ClickHouse/pull/12026) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Moved useless S3 logging to TRACE level. [#12067](https://github.com/ClickHouse/ClickHouse/pull/12067) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Improves `REVOKE` command: now it requires grant/admin option for only access which will be revoked. For example, to execute `REVOKE ALL ON *.* FROM user1` now it doesn't require to have full access rights granted with grant option. Added command `REVOKE ALL FROM user1` - it revokes all granted roles from `user1`. [#12083](https://github.com/ClickHouse/ClickHouse/pull/12083) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add 'type' column in system.disks. [#12115](https://github.com/ClickHouse/ClickHouse/pull/12115) ([ianton-ru](https://github.com/ianton-ru)). +* Added support for `%g` (two digit ISO year) and `%G` (four digit ISO year) substitutions in `formatDateTime` function. [#12136](https://github.com/ClickHouse/ClickHouse/pull/12136) ([vivarum](https://github.com/vivarum)). +* Add `KILL QUERY [connection_id]` for the MySQL client/driver to cancel the long query, issue [#12038](https://github.com/ClickHouse/ClickHouse/issues/12038). [#12152](https://github.com/ClickHouse/ClickHouse/pull/12152) ([BohuTANG](https://github.com/BohuTANG)). +* 1. Support MySQL 'SELECT DATABASE()' [#9336](https://github.com/ClickHouse/ClickHouse/issues/9336) 2. Add MySQL replacement query integration test. [#12314](https://github.com/ClickHouse/ClickHouse/pull/12314) ([BohuTANG](https://github.com/BohuTANG)). +* This setting allows to chose charset for printing grids (either utf8 or ascii). [#12372](https://github.com/ClickHouse/ClickHouse/pull/12372) ([Sabyanin Maxim](https://github.com/s-mx)). +* Write the detail exception message to the client instead of 'MySQL server has gone away'. [#12383](https://github.com/ClickHouse/ClickHouse/pull/12383) ([BohuTANG](https://github.com/BohuTANG)). +* lifetime_rows/lifetime_bytes for Buffer engine. [#12421](https://github.com/ClickHouse/ClickHouse/pull/12421) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Fix wrong result for `if()` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix memory accounting via HTTP interface (can be significant with `wait_end_of_query=1`). [#11840](https://github.com/ClickHouse/ClickHouse/pull/11840) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix potential floating point exception when parsing DateTime64. This fixes [#11374](https://github.com/ClickHouse/ClickHouse/issues/11374). [#11875](https://github.com/ClickHouse/ClickHouse/pull/11875) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed bug with no moves when changing storage policy from default one. [#11893](https://github.com/ClickHouse/ClickHouse/pull/11893) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix rare crash caused by using Nullable column in prewhere condition. Continuation of [#11869](https://github.com/ClickHouse/ClickHouse/issues/11869). [#11895](https://github.com/ClickHouse/ClickHouse/pull/11895) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Keep aliases for substitutions in query (parametrized queries). This fixes [#11914](https://github.com/ClickHouse/ClickHouse/issues/11914). [#11916](https://github.com/ClickHouse/ClickHouse/pull/11916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unitialized memory in partitions serialization. [#11919](https://github.com/ClickHouse/ClickHouse/pull/11919) ([alesapin](https://github.com/alesapin)). +* Use the correct current database for checking access rights after statement `USE database`. [#11920](https://github.com/ClickHouse/ClickHouse/pull/11920) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed `Query parameter was not set` in `Values` format. Fixes [#11918](https://github.com/ClickHouse/ClickHouse/issues/11918). [#11936](https://github.com/ClickHouse/ClickHouse/pull/11936) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix race condition in extractAllGroups* functions. [#11949](https://github.com/ClickHouse/ClickHouse/pull/11949) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make mmap IO work again (experimental). Continuation of [#8520](https://github.com/ClickHouse/ClickHouse/issues/8520). [#11953](https://github.com/ClickHouse/ClickHouse/pull/11953) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong setting name in log message at server startup. [#11997](https://github.com/ClickHouse/ClickHouse/pull/11997) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Partial revokes work correctly in complex cases as well, for example. [#12002](https://github.com/ClickHouse/ClickHouse/pull/12002) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix potential floating point exception. This closes [#11378](https://github.com/ClickHouse/ClickHouse/issues/11378). [#12005](https://github.com/ClickHouse/ClickHouse/pull/12005) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid returning wrong number of geohashes in function `geoHashesInBox` due to accumulation of floating point error. This fixes [#11369](https://github.com/ClickHouse/ClickHouse/issues/11369). [#12006](https://github.com/ClickHouse/ClickHouse/pull/12006) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential array size overflow in generateRandom that may lead to crash. This fixes [#11371](https://github.com/ClickHouse/ClickHouse/issues/11371). [#12013](https://github.com/ClickHouse/ClickHouse/pull/12013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix calculation of access rights when allow_ddl=0. [#12015](https://github.com/ClickHouse/ClickHouse/pull/12015) ([Vitaly Baranov](https://github.com/vitlibar)). +* When adding floating point number of intervals to date/datetime, the result may be calculated incorrectly. This fixes [#11377](https://github.com/ClickHouse/ClickHouse/issues/11377). [#12018](https://github.com/ClickHouse/ClickHouse/pull/12018) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* A query with function `neighbor` as the only returned expression may return empty result if the function is called with offset `-9223372036854775808`. This fixes [#11367](https://github.com/ClickHouse/ClickHouse/issues/11367). [#12019](https://github.com/ClickHouse/ClickHouse/pull/12019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not mention in changelog, because the bug did not come to release. Fix potential crash when doing ORDER BY multiple columns with specified COLLATE on one of the column when this column is constant. This fixes [#11379](https://github.com/ClickHouse/ClickHouse/issues/11379). The bug was introduced in [#11006](https://github.com/ClickHouse/ClickHouse/issues/11006) in version 20.5. [#12020](https://github.com/ClickHouse/ClickHouse/pull/12020) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result and potential crash when invoking function `if` with arguments of type `FixedString` with different sizes. This fixes [#11362](https://github.com/ClickHouse/ClickHouse/issues/11362). [#12021](https://github.com/ClickHouse/ClickHouse/pull/12021) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix calculation of access rights when allow_introspection_functions=0. [#12031](https://github.com/ClickHouse/ClickHouse/pull/12031) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash in JOIN with LowCardinality type with `join_algorithm=partial_merge`. [#12035](https://github.com/ClickHouse/ClickHouse/pull/12035) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix incorrect comparison of tuples with `Nullable` columns. Fixes [#11985](https://github.com/ClickHouse/ClickHouse/issues/11985). [#12039](https://github.com/ClickHouse/ClickHouse/pull/12039) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix constraints check if constraint is a constant expression. This fixes [#11360](https://github.com/ClickHouse/ClickHouse/issues/11360). [#12042](https://github.com/ClickHouse/ClickHouse/pull/12042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `topK` aggregate function return Enum for Enum types. This fixes [#3740](https://github.com/ClickHouse/ClickHouse/issues/3740). [#12043](https://github.com/ClickHouse/ClickHouse/pull/12043) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parse tables metadata in parallel when loading database. This fixes slow server startup when there are large number of tables. [#12045](https://github.com/ClickHouse/ClickHouse/pull/12045) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error `Cannot capture column` for higher-order functions with `Tuple(LowCardinality)` argument. Fixes [#9766](https://github.com/ClickHouse/ClickHouse/issues/9766). [#12055](https://github.com/ClickHouse/ClickHouse/pull/12055) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error `Expected single dictionary argument for function` for function `defaultValueOfArgumentType` with `LowCardinality` type. Fixes [#11808](https://github.com/ClickHouse/ClickHouse/issues/11808). [#12056](https://github.com/ClickHouse/ClickHouse/pull/12056) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible crash while using wrong type for `PREWHERE`. Fixes [#12053](https://github.com/ClickHouse/ClickHouse/issues/12053), [#12060](https://github.com/ClickHouse/ClickHouse/issues/12060). [#12060](https://github.com/ClickHouse/ClickHouse/pull/12060) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV in StorageKafka on DROP TABLE. [#12075](https://github.com/ClickHouse/ClickHouse/pull/12075) ([Azat Khuzhin](https://github.com/azat)). +* Fix unnecessary limiting the number of threads for selects from `VIEW`. Fixes [#11937](https://github.com/ClickHouse/ClickHouse/issues/11937). [#12085](https://github.com/ClickHouse/ClickHouse/pull/12085) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix empty `result_rows` and `result_bytes` metrics in `system.quey_log` for selects. Fixes [#11595](https://github.com/ClickHouse/ClickHouse/issues/11595). [#12089](https://github.com/ClickHouse/ClickHouse/pull/12089) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix segfault with `-StateResample` combinators. [#12092](https://github.com/ClickHouse/ClickHouse/pull/12092) ([Anton Popov](https://github.com/CurtizJ)). +* Fix performance for selects with `UNION` caused by wrong limit for the total number of threads. Fixes [#12030](https://github.com/ClickHouse/ClickHouse/issues/12030). [#12103](https://github.com/ClickHouse/ClickHouse/pull/12103) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Format `Parquet` now properly works with `LowCardinality` and `LowCardinality(Nullable)` types. Fixes [#12086](https://github.com/ClickHouse/ClickHouse/issues/12086), [#8406](https://github.com/ClickHouse/ClickHouse/issues/8406). [#12108](https://github.com/ClickHouse/ClickHouse/pull/12108) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix handling dependency of table with ENGINE=Dictionary on dictionary. This fixes [#10994](https://github.com/ClickHouse/ClickHouse/issues/10994). This fixes [#10397](https://github.com/ClickHouse/ClickHouse/issues/10397). [#12116](https://github.com/ClickHouse/ClickHouse/pull/12116) ([Vitaly Baranov](https://github.com/vitlibar)). +* Avoid "There is no query" exception for materialized views with joins or with subqueries attached to system logs (system.query_log, metric_log, etc) or to engine=Buffer underlying table. [#12120](https://github.com/ClickHouse/ClickHouse/pull/12120) ([filimonov](https://github.com/filimonov)). +* Fix bug which leads to incorrect table metadata in ZooKeepeer for ReplicatedVersionedCollapsingMergeTree tables. Fixes [#12093](https://github.com/ClickHouse/ClickHouse/issues/12093). [#12121](https://github.com/ClickHouse/ClickHouse/pull/12121) ([alesapin](https://github.com/alesapin)). +* Normalize "pid" file handling. In previous versions the server may refuse to start if it was killed without proper shutdown and if there is another process that has the same pid as previously runned server. Also pid file may be removed in unsuccessful server startup even if there is another server running. This fixes [#3501](https://github.com/ClickHouse/ClickHouse/issues/3501). [#12133](https://github.com/ClickHouse/ClickHouse/pull/12133) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential infinite loop in `greatCircleDistance`, `geoDistance`. This fixes [#12117](https://github.com/ClickHouse/ClickHouse/issues/12117). [#12137](https://github.com/ClickHouse/ClickHouse/pull/12137) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potential overflow in integer division. This fixes [#12119](https://github.com/ClickHouse/ClickHouse/issues/12119). [#12140](https://github.com/ClickHouse/ClickHouse/pull/12140) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad code in redundant ORDER BY optimization. The bug was introduced in [#10067](https://github.com/ClickHouse/ClickHouse/issues/10067). [#12148](https://github.com/ClickHouse/ClickHouse/pull/12148) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix transform of query to send to external DBMS (e.g. MySQL, ODBC) in presense of aliases. This fixes [#12032](https://github.com/ClickHouse/ClickHouse/issues/12032). [#12151](https://github.com/ClickHouse/ClickHouse/pull/12151) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong logic in ALTER DELETE that leads to deleting of records when condition evaluates to NULL. This fixes [#9088](https://github.com/ClickHouse/ClickHouse/issues/9088). This closes [#12106](https://github.com/ClickHouse/ClickHouse/issues/12106). [#12153](https://github.com/ClickHouse/ClickHouse/pull/12153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't split the dictionary source's table name into schema and table name itself if ODBC connection doesn't support schema. [#12165](https://github.com/ClickHouse/ClickHouse/pull/12165) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed the behaviour when `SummingMergeTree` engine sums up columns from partition key. Added an exception in case of explicit definition of columns to sum which intersects with partition key columns. This fixes [#7867](https://github.com/ClickHouse/ClickHouse/issues/7867). [#12173](https://github.com/ClickHouse/ClickHouse/pull/12173) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix dictGet arguments check during GROUP BY injective functions elimination. [#12179](https://github.com/ClickHouse/ClickHouse/pull/12179) ([Azat Khuzhin](https://github.com/azat)). +* Cap max_memory_usage* limits to the process resident memory. [#12182](https://github.com/ClickHouse/ClickHouse/pull/12182) ([Azat Khuzhin](https://github.com/azat)). +* Fixed logical functions for UInt8 values when they are not equal to 0 or 1. [#12196](https://github.com/ClickHouse/ClickHouse/pull/12196) ([Alexander Kazakov](https://github.com/Akazz)). +* Fixed the behaviour when during multiple sequential inserts in `StorageFile` header for some special types was written more than once. This fixed [#6155](https://github.com/ClickHouse/ClickHouse/issues/6155). [#12197](https://github.com/ClickHouse/ClickHouse/pull/12197) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixed behaviour on reaching redirect limit in request to S3 storage. [#12256](https://github.com/ClickHouse/ClickHouse/pull/12256) ([ianton-ru](https://github.com/ianton-ru)). +* Not for changelog. Cherry-pick after [#12196](https://github.com/ClickHouse/ClickHouse/issues/12196). [#12271](https://github.com/ClickHouse/ClickHouse/pull/12271) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implement conversions to the common type for LowCardinality types. This allows to execute UNION ALL of tables with columns of LowCardinality and other columns. This fixes [#8212](https://github.com/ClickHouse/ClickHouse/issues/8212). This fixes [#4342](https://github.com/ClickHouse/ClickHouse/issues/4342). [#12275](https://github.com/ClickHouse/ClickHouse/pull/12275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `arrayFill` worked incorrectly for empty arrays that may lead to crash. This fixes [#12263](https://github.com/ClickHouse/ClickHouse/issues/12263). [#12279](https://github.com/ClickHouse/ClickHouse/pull/12279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Show error after TrieDictionary failed to load. [#12290](https://github.com/ClickHouse/ClickHouse/pull/12290) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix typo in setting name. [#12292](https://github.com/ClickHouse/ClickHouse/pull/12292) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some threads might randomly hang for a few seconds during DNS cache updating. It's fixed. [#12296](https://github.com/ClickHouse/ClickHouse/pull/12296) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#12724](https://github.com/ClickHouse/ClickHouse/issues/12724): kafka: fix SIGSEGV if there is an message with error in the middle of the batch. [#12302](https://github.com/ClickHouse/ClickHouse/pull/12302) ([Azat Khuzhin](https://github.com/azat)). +* Fix TTL after renaming column, on which depends TTL expression. [#12304](https://github.com/ClickHouse/ClickHouse/pull/12304) ([Anton Popov](https://github.com/CurtizJ)). +* Avoid "bad cast" exception when there is an expression that filters data by virtual columns (like `_table` in `Merge` tables) or by "index" columns in system tables such as filtering by database name when querying from `system.tables`, and this expression returns `Nullable` type. This fixes [#12166](https://github.com/ClickHouse/ClickHouse/issues/12166). [#12305](https://github.com/ClickHouse/ClickHouse/pull/12305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix order of columns in `WITH FILL` modifier. Previously order of columns of `ORDER BY` statement wasn't respected. [#12306](https://github.com/ClickHouse/ClickHouse/pull/12306) ([Anton Popov](https://github.com/CurtizJ)). +* When using codec `Delta` or `DoubleDelta` with non fixed width types, exception with code `LOGICAL_ERROR` was returned instead of exception with code `BAD_ARGUMENTS` (we ensure that exceptions with code logical error never happen). This fixes [#12110](https://github.com/ClickHouse/ClickHouse/issues/12110). [#12308](https://github.com/ClickHouse/ClickHouse/pull/12308) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix very rare race condition in ReplicatedMergeTreeQueue. [#12315](https://github.com/ClickHouse/ClickHouse/pull/12315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error message and exit codes for `ALTER RENAME COLUMN` queries, when `RENAME` is not allowed. Fixes [#12301](https://github.com/ClickHouse/ClickHouse/issues/12301) and [#12303](https://github.com/ClickHouse/ClickHouse/issues/12303). [#12335](https://github.com/ClickHouse/ClickHouse/pull/12335) ([alesapin](https://github.com/alesapin)). +* Fix TOTALS/ROLLUP/CUBE for aggregate functions with `-State` and `Nullable` arguments. This fixes [#12163](https://github.com/ClickHouse/ClickHouse/issues/12163). [#12376](https://github.com/ClickHouse/ClickHouse/pull/12376) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to CLEAR column even if there are depending DEFAULT expressions. This fixes [#12333](https://github.com/ClickHouse/ClickHouse/issues/12333). [#12378](https://github.com/ClickHouse/ClickHouse/pull/12378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12723](https://github.com/ClickHouse/ClickHouse/issues/12723): If MergeTree table does not contain ORDER BY or PARTITION BY, it was possible to request ALTER to CLEAR all the columns and ALTER will stuck. Fixed [#7941](https://github.com/ClickHouse/ClickHouse/issues/7941). [#12382](https://github.com/ClickHouse/ClickHouse/pull/12382) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid exception when negative or floating point constant is used in WHERE condition for indexed tables. This fixes [#11905](https://github.com/ClickHouse/ClickHouse/issues/11905). [#12384](https://github.com/ClickHouse/ClickHouse/pull/12384) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Additional check for arguments of bloom filter index. This fixes [#11408](https://github.com/ClickHouse/ClickHouse/issues/11408). [#12388](https://github.com/ClickHouse/ClickHouse/pull/12388) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reverts change introduced in [#11079](https://github.com/ClickHouse/ClickHouse/issues/11079) to resolve [#12098](https://github.com/ClickHouse/ClickHouse/issues/12098). [#12397](https://github.com/ClickHouse/ClickHouse/pull/12397) ([Mike Kot](https://github.com/myrrc)). +* Fixed possible segfault if StorageMerge. Closes [#12054](https://github.com/ClickHouse/ClickHouse/issues/12054). [#12401](https://github.com/ClickHouse/ClickHouse/pull/12401) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#12725](https://github.com/ClickHouse/ClickHouse/issues/12725): Fix crash in JOIN with dictionary when we are joining over expression of dictionary key: `t JOIN dict ON expr(dict.id) = t.id`. Disable dictionary join optimisation for this case. [#12458](https://github.com/ClickHouse/ClickHouse/pull/12458) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#12803](https://github.com/ClickHouse/ClickHouse/issues/12803): Fix SETTINGS parse after FORMAT. [#12480](https://github.com/ClickHouse/ClickHouse/pull/12480) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12862](https://github.com/ClickHouse/ClickHouse/issues/12862): Fixed performance issue, while reading from compact parts. [#12492](https://github.com/ClickHouse/ClickHouse/pull/12492) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#12722](https://github.com/ClickHouse/ClickHouse/issues/12722): Fixing race condition in live view tables which could cause data duplication. [#12519](https://github.com/ClickHouse/ClickHouse/pull/12519) ([vzakaznikov](https://github.com/vzakaznikov)). +* Backported in [#12721](https://github.com/ClickHouse/ClickHouse/issues/12721): Now ClickHouse will recalculate checksums for parts when file `checksums.txt` is absent. Broken since [#9827](https://github.com/ClickHouse/ClickHouse/issues/9827). [#12545](https://github.com/ClickHouse/ClickHouse/pull/12545) ([alesapin](https://github.com/alesapin)). +* Backported in [#12695](https://github.com/ClickHouse/ClickHouse/issues/12695): Fix error `Output of TreeExecutor is not sorted` for `OPTIMIZE DEDUPLICATE`. Fixes [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572). [#12613](https://github.com/ClickHouse/ClickHouse/pull/12613) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12699](https://github.com/ClickHouse/ClickHouse/issues/12699): Fix possible `Pipeline stuck` error for queries with external sorting. Fixes [#12617](https://github.com/ClickHouse/ClickHouse/issues/12617). [#12618](https://github.com/ClickHouse/ClickHouse/pull/12618) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#12720](https://github.com/ClickHouse/ClickHouse/issues/12720): Fix error message about adaptive granularity. [#12624](https://github.com/ClickHouse/ClickHouse/pull/12624) ([alesapin](https://github.com/alesapin)). +* Backported in [#12696](https://github.com/ClickHouse/ClickHouse/issues/12696): Better exception message in disk access storage. [#12625](https://github.com/ClickHouse/ClickHouse/pull/12625) ([alesapin](https://github.com/alesapin)). +* Backported in [#12698](https://github.com/ClickHouse/ClickHouse/issues/12698): Exception `There is no supertype...` can be thrown during `ALTER ... UPDATE` in unexpected cases (e.g. when subtracting from UInt64 column). This fixes [#7306](https://github.com/ClickHouse/ClickHouse/issues/7306). This fixes [#4165](https://github.com/ClickHouse/ClickHouse/issues/4165). [#12633](https://github.com/ClickHouse/ClickHouse/pull/12633) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12697](https://github.com/ClickHouse/ClickHouse/issues/12697): Add support for function `if` with `Array(UUID)` arguments. This fixes [#11066](https://github.com/ClickHouse/ClickHouse/issues/11066). [#12648](https://github.com/ClickHouse/ClickHouse/pull/12648) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#12971](https://github.com/ClickHouse/ClickHouse/issues/12971): Fix SIGSEGV in StorageKafka when broker is unavailable (and not only). [#12658](https://github.com/ClickHouse/ClickHouse/pull/12658) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#12858](https://github.com/ClickHouse/ClickHouse/issues/12858): fixes [#10572](https://github.com/ClickHouse/ClickHouse/issues/10572) fix bloom filter index with const expression. [#12659](https://github.com/ClickHouse/ClickHouse/pull/12659) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#12868](https://github.com/ClickHouse/ClickHouse/issues/12868): fixes [#12293](https://github.com/ClickHouse/ClickHouse/issues/12293) allow push predicate when subquery contains with clause. [#12663](https://github.com/ClickHouse/ClickHouse/pull/12663) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#12994](https://github.com/ClickHouse/ClickHouse/issues/12994): Fix optimization `optimize_move_functions_out_of_any=1` in case of `any(func())`. [#12664](https://github.com/ClickHouse/ClickHouse/pull/12664) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#12864](https://github.com/ClickHouse/ClickHouse/issues/12864): Fix memory tracking for input_format_parallel_parsing (by attaching thread to group). [#12672](https://github.com/ClickHouse/ClickHouse/pull/12672) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13187](https://github.com/ClickHouse/ClickHouse/issues/13187): Fix performance with large tuples, which are interpreted as functions in `IN` section. The case when user write `WHERE x IN tuple(1, 2, ...)` instead of `WHERE x IN (1, 2, ...)` for some obscure reason. [#12700](https://github.com/ClickHouse/ClickHouse/pull/12700) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13095](https://github.com/ClickHouse/ClickHouse/issues/13095): Fix CAST(Nullable(String), Enum()). [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13011](https://github.com/ClickHouse/ClickHouse/issues/13011): Fix rare bug when `ALTER DELETE` and `ALTER MODIFY COLUMN` queries executed simultaneously as a single mutation. Bug leads to an incorrect amount of rows in `count.txt` and as a consequence incorrect data in part. Also, fix a small bug with simultaneous `ALTER RENAME COLUMN` and `ALTER ADD COLUMN`. [#12760](https://github.com/ClickHouse/ClickHouse/pull/12760) ([alesapin](https://github.com/alesapin)). +* Backported in [#13031](https://github.com/ClickHouse/ClickHouse/issues/13031): Corrected merge_with_ttl_timeout logic which did not work well when expiration affected more than one partition over one time interval. (Authored by @excitoon). [#12982](https://github.com/ClickHouse/ClickHouse/pull/12982) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#13049](https://github.com/ClickHouse/ClickHouse/issues/13049): Fix `Block structure mismatch` error for queries with `UNION` and `JOIN`. Fixes [#12602](https://github.com/ClickHouse/ClickHouse/issues/12602). [#12989](https://github.com/ClickHouse/ClickHouse/pull/12989) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13051](https://github.com/ClickHouse/ClickHouse/issues/13051): Fix crash which was possible for queries with `ORDER BY` tuple and small `LIMIT`. Fixes [#12623](https://github.com/ClickHouse/ClickHouse/issues/12623). [#13009](https://github.com/ClickHouse/ClickHouse/pull/13009) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13080](https://github.com/ClickHouse/ClickHouse/issues/13080): Add logging when the mutation is not running because of limited disk space or free threads in the background pool. [#13068](https://github.com/ClickHouse/ClickHouse/pull/13068) ([alesapin](https://github.com/alesapin)). +* Backported in [#13184](https://github.com/ClickHouse/ClickHouse/issues/13184): Fix error `Cannot convert column because it is constant but values of constants are different in source and result` for remote queries which use deterministic functions in scope of query, but not deterministic between queries, like `now()`, `now64()`, `randConstant()`. Fixes [#11327](https://github.com/ClickHouse/ClickHouse/issues/11327). [#13075](https://github.com/ClickHouse/ClickHouse/pull/13075) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13146](https://github.com/ClickHouse/ClickHouse/issues/13146): Fix wrong index analysis with functions. It could lead to pruning wrong parts, while reading from `MergeTree` tables. Fixes [#13060](https://github.com/ClickHouse/ClickHouse/issues/13060). Fixes [#12406](https://github.com/ClickHouse/ClickHouse/issues/12406). [#13081](https://github.com/ClickHouse/ClickHouse/pull/13081) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement +* Add simple GitHub hook script for the serverless environment. [#11605](https://github.com/ClickHouse/ClickHouse/pull/11605) ([alesapin](https://github.com/alesapin)). +* Send logs to client on fatal errors if possible. This will make test results more readable. [#11826](https://github.com/ClickHouse/ClickHouse/pull/11826) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow ClickHouse to run on Android. [#11894](https://github.com/ClickHouse/ClickHouse/pull/11894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clean up unused header files from `Obfuscator.cpp` and `DatabaseAtomic.cpp`. [#11922](https://github.com/ClickHouse/ClickHouse/pull/11922) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix some typos in code. [#12003](https://github.com/ClickHouse/ClickHouse/pull/12003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Scripts for automated scheduled backporting based on PR labels. [#12029](https://github.com/ClickHouse/ClickHouse/pull/12029) ([Ivan](https://github.com/abyss7)). +* Add new type of tests based on Testflows framework. [#12090](https://github.com/ClickHouse/ClickHouse/pull/12090) ([vzakaznikov](https://github.com/vzakaznikov)). +* Install `ca-certificates` before the first `apt-get update` in Dockerfile. [#12095](https://github.com/ClickHouse/ClickHouse/pull/12095) ([Ivan Blinkov](https://github.com/blinkov)). +* Daily check by [GitHub CodeQL](https://securitylab.github.com/tools/codeql) security analysis tool that looks for [CWE](https://cwe.mitre.org/). [#12101](https://github.com/ClickHouse/ClickHouse/pull/12101) ([Ivan Blinkov](https://github.com/blinkov)). +* Regular check by [Anchore Container Analysis](https://docs.anchore.com) security analysis tool that looks for [CVE](https://cve.mitre.org/) in `clickhouse-server` Docker image. Also confirms that `Dockerfile` is buildable. Runs daily on `master` and on pull-requests to `Dockerfile`. [#12102](https://github.com/ClickHouse/ClickHouse/pull/12102) ([Ivan Blinkov](https://github.com/blinkov)). +* Add `UNBUNDLED` flag to `system.build_options` table. Move skip lists for `clickhouse-test` to clickhouse repo. [#12107](https://github.com/ClickHouse/ClickHouse/pull/12107) ([alesapin](https://github.com/alesapin)). +* Implement AST-based query fuzzing mode for clickhouse-client. See [this label](https://github.com/ClickHouse/ClickHouse/issues?q=label%3Afuzz+is%3Aissue) for the list of issues we recently found by fuzzing. Most of them were found by this tool, and a couple by SQLancer and `00746_sql_fuzzy.pl`. [#12111](https://github.com/ClickHouse/ClickHouse/pull/12111) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Perform an upgrade of system packages in the `clickhouse-server` docker image. [#12124](https://github.com/ClickHouse/ClickHouse/pull/12124) ([Ivan Blinkov](https://github.com/blinkov)). +* Added a showcase of the minimal Docker image without using any Linux distribution. [#12126](https://github.com/ClickHouse/ClickHouse/pull/12126) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor CMake fixes for UNBUNDLED build. [#12131](https://github.com/ClickHouse/ClickHouse/pull/12131) ([Matwey V. Kornilov](https://github.com/matwey)). +* Missed `` is required for `std::atomic<>`. [#12134](https://github.com/ClickHouse/ClickHouse/pull/12134) ([Matwey V. Kornilov](https://github.com/matwey)). +* Fix warnings from CodeQL. `CodeQL` is another static analyzer that we will use along with `clang-tidy` and `PVS-Studio` that we use already. [#12138](https://github.com/ClickHouse/ClickHouse/pull/12138) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Record additional detail on Dockerfile scan reports. [#12159](https://github.com/ClickHouse/ClickHouse/pull/12159) ([Ivan Blinkov](https://github.com/blinkov)). +* Place common docker compose files to integration docker container. [#12168](https://github.com/ClickHouse/ClickHouse/pull/12168) ([Ilya Yatsishin](https://github.com/qoega)). +* Remove verbosity from the binary builds. [#12174](https://github.com/ClickHouse/ClickHouse/pull/12174) ([alesapin](https://github.com/alesapin)). +* Remove strange file creation during build in `orc`. [#12258](https://github.com/ClickHouse/ClickHouse/pull/12258) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now functional and stress tests will be able to run with old version of `clickhouse-test` script. [#12287](https://github.com/ClickHouse/ClickHouse/pull/12287) ([alesapin](https://github.com/alesapin)). +* Log sanitizer trap messages from separate thread. This will prevent possible deadlock under thread sanitizer. [#12313](https://github.com/ClickHouse/ClickHouse/pull/12313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added S3 HTTPS integration test. [#12412](https://github.com/ClickHouse/ClickHouse/pull/12412) ([Pavel Kovalenko](https://github.com/Jokser)). + +#### Other +* Update word break characters to match readline default - all non-alphanumeric characters. ... [#11975](https://github.com/ClickHouse/ClickHouse/pull/11975) ([Andrew Onyshchuk](https://github.com/oandrew)). + +#### NO CL CATEGORY + +* * Not for changelog. [#12265](https://github.com/ClickHouse/ClickHouse/pull/12265) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* ... [#12431](https://github.com/ClickHouse/ClickHouse/pull/12431) ([Tom Bombadil](https://github.com/ithangzhou)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump certifi from 2020.4.5.2 to 2020.6.20 in /docs/tools/translate'. [#11853](https://github.com/ClickHouse/ClickHouse/pull/11853) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Merging [#10679](https://github.com/ClickHouse/ClickHouse/issues/10679)'. [#11896](https://github.com/ClickHouse/ClickHouse/pull/11896) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "[experiment] maybe fix warnings in integration tests"'. [#12011](https://github.com/ClickHouse/ClickHouse/pull/12011) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Bump idna from 2.9 to 2.10 in /docs/tools'. [#12024](https://github.com/ClickHouse/ClickHouse/pull/12024) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump python-slugify from 1.2.6 to 4.0.1 in /docs/tools'. [#12049](https://github.com/ClickHouse/ClickHouse/pull/12049) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + diff --git a/docs/changelogs/v20.6.3.28-stable.md b/docs/changelogs/v20.6.3.28-stable.md new file mode 100644 index 00000000000..c8ca7db9bd8 --- /dev/null +++ b/docs/changelogs/v20.6.3.28-stable.md @@ -0,0 +1,17 @@ +### ClickHouse release v20.6.3.28-stable FIXME as compared to v20.6.2.15-prestable + +#### Bug Fix +* Backported in [#13301](https://github.com/ClickHouse/ClickHouse/issues/13301): The function `groupArrayMoving*` was not working for distributed queries. It's result was calculated within incorrect data type (without promotion to the largest type). The function `groupArrayMovingAvg` was returning integer number that was inconsistent with the `avg` function. This fixes [#12568](https://github.com/ClickHouse/ClickHouse/issues/12568). [#12622](https://github.com/ClickHouse/ClickHouse/pull/12622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13029](https://github.com/ClickHouse/ClickHouse/issues/13029): CREATE USER IF NOT EXISTS now doesn't throw exception if the user exists. This fixes [#12507](https://github.com/ClickHouse/ClickHouse/issues/12507). [#12646](https://github.com/ClickHouse/ClickHouse/pull/12646) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#12983](https://github.com/ClickHouse/ClickHouse/issues/12983): Fix columns duplication for range hashed dictionary created from DDL query. This fixes [#10605](https://github.com/ClickHouse/ClickHouse/issues/10605). [#12857](https://github.com/ClickHouse/ClickHouse/pull/12857) ([alesapin](https://github.com/alesapin)). +* Backported in [#13222](https://github.com/ClickHouse/ClickHouse/issues/13222): Fix DateTime64 conversion functions with constant argument. [#13205](https://github.com/ClickHouse/ClickHouse/pull/13205) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13245](https://github.com/ClickHouse/ClickHouse/issues/13245): Fix assert in `arrayElement` function in case of array elements are Nullable and array subscript is also Nullable. This fixes [#12172](https://github.com/ClickHouse/ClickHouse/issues/12172). [#13224](https://github.com/ClickHouse/ClickHouse/pull/13224) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13267](https://github.com/ClickHouse/ClickHouse/issues/13267): Fix function if with nullable constexpr as cond that is not literal NULL. Fixes [#12463](https://github.com/ClickHouse/ClickHouse/issues/12463). [#13226](https://github.com/ClickHouse/ClickHouse/pull/13226) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13298](https://github.com/ClickHouse/ClickHouse/issues/13298): Fix potentially low performance and slightly incorrect result for `uniqExact`, `topK`, `sumDistinct` and similar aggregate functions called on Float types with NaN values. It also triggered assert in debug build. This fixes [#12491](https://github.com/ClickHouse/ClickHouse/issues/12491). [#13254](https://github.com/ClickHouse/ClickHouse/pull/13254) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13316](https://github.com/ClickHouse/ClickHouse/issues/13316): The server may crash if user passed specifically crafted arguments to the function `h3ToChildren`. This fixes [#13275](https://github.com/ClickHouse/ClickHouse/issues/13275). [#13277](https://github.com/ClickHouse/ClickHouse/pull/13277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13329](https://github.com/ClickHouse/ClickHouse/issues/13329): Fix possible error `Totals having transform was already added to pipeline` in case of a query from delayed replica. [#13290](https://github.com/ClickHouse/ClickHouse/pull/13290) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13317](https://github.com/ClickHouse/ClickHouse/issues/13317): Fix crash in `LEFT ASOF JOIN` with `join_use_nulls=1`. [#13291](https://github.com/ClickHouse/ClickHouse/pull/13291) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#13375](https://github.com/ClickHouse/ClickHouse/issues/13375): Fix segfault when function `groupArrayMovingSum` deserializes empty state. Fixes [#13339](https://github.com/ClickHouse/ClickHouse/issues/13339). [#13341](https://github.com/ClickHouse/ClickHouse/pull/13341) ([alesapin](https://github.com/alesapin)). +* Backported in [#13432](https://github.com/ClickHouse/ClickHouse/issues/13432): Fix PrettyCompactMonoBlock for clickhouse-local. Fix extremes/totals with PrettyCompactMonoBlock. Fixes [#7746](https://github.com/ClickHouse/ClickHouse/issues/7746). [#13394](https://github.com/ClickHouse/ClickHouse/pull/13394) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13443](https://github.com/ClickHouse/ClickHouse/issues/13443): Fix `aggregate function any(x) is found inside another aggregate function in query` error with `SET optimize_move_functions_out_of_any = 1` and aliases inside `any()`. [#13419](https://github.com/ClickHouse/ClickHouse/pull/13419) ([Artem Zuikov](https://github.com/4ertus2)). + diff --git a/docs/changelogs/v20.6.4.44-stable.md b/docs/changelogs/v20.6.4.44-stable.md new file mode 100644 index 00000000000..828f95f5144 --- /dev/null +++ b/docs/changelogs/v20.6.4.44-stable.md @@ -0,0 +1,22 @@ +### ClickHouse release v20.6.4.44-stable FIXME as compared to v20.6.3.28-stable + +#### Improvement +* Backported in [#13919](https://github.com/ClickHouse/ClickHouse/issues/13919): Fix data race in `lgamma` function. This race was caught only in `tsan`, no side effects a really happened. [#13842](https://github.com/ClickHouse/ClickHouse/pull/13842) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix +* Backported in [#13560](https://github.com/ClickHouse/ClickHouse/issues/13560): Fix access to redis dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13507](https://github.com/ClickHouse/ClickHouse/issues/13507): Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#13357](https://github.com/ClickHouse/ClickHouse/issues/13357): AvroConfluent: Skip Kafka tombstone records AvroConfluent: Support skipping broken records ... [#13203](https://github.com/ClickHouse/ClickHouse/pull/13203) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Backported in [#13350](https://github.com/ClickHouse/ClickHouse/issues/13350): Return passed number for numbers with MSB set in roundUpToPowerOfTwoOrZero(). [#13234](https://github.com/ClickHouse/ClickHouse/pull/13234) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13479](https://github.com/ClickHouse/ClickHouse/issues/13479): Fix queries with constant columns and `ORDER BY` prefix of primary key. [#13396](https://github.com/ClickHouse/ClickHouse/pull/13396) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13487](https://github.com/ClickHouse/ClickHouse/issues/13487): Fix empty output for `Arrow` and `Parquet` formats in case if query return zero rows. It was done because empty output is not valid for this formats. [#13399](https://github.com/ClickHouse/ClickHouse/pull/13399) ([hcz](https://github.com/hczhcz)). +* Backported in [#13524](https://github.com/ClickHouse/ClickHouse/issues/13524): Fix possible race in `StorageMemory`. https://clickhouse-test-reports.s3.yandex.net/0/9cac8a7244063d2092ad25d45502611e18d3749c/stress_test_(thread)/stderr.log Have no idea how to write a test. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13483](https://github.com/ClickHouse/ClickHouse/issues/13483): Fix invalid return type for comparison of tuples with `NULL` elements. Fixes [#12461](https://github.com/ClickHouse/ClickHouse/issues/12461). [#13420](https://github.com/ClickHouse/ClickHouse/pull/13420) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13462](https://github.com/ClickHouse/ClickHouse/issues/13462): Fix error in `parseDateTimeBestEffort` function when unix timestamp was passed as an argument. This fixes [#13362](https://github.com/ClickHouse/ClickHouse/issues/13362). [#13441](https://github.com/ClickHouse/ClickHouse/pull/13441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13552](https://github.com/ClickHouse/ClickHouse/issues/13552): Fix secondary indices corruption in compact parts. [#13538](https://github.com/ClickHouse/ClickHouse/pull/13538) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13662](https://github.com/ClickHouse/ClickHouse/issues/13662): Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#13720](https://github.com/ClickHouse/ClickHouse/issues/13720): Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#13703](https://github.com/ClickHouse/ClickHouse/issues/13703): Do not optimize any(arrayJoin()) -> arrayJoin() under optimize_move_functions_out_of_any. [#13681](https://github.com/ClickHouse/ClickHouse/pull/13681) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13733](https://github.com/ClickHouse/ClickHouse/issues/13733): Fix incorrect message in `clickhouse-server.init` while checking user and group. [#13711](https://github.com/ClickHouse/ClickHouse/pull/13711) ([ylchou](https://github.com/ylchou)). +* Backported in [#13902](https://github.com/ClickHouse/ClickHouse/issues/13902): Fix incorrect sorting for `FixedString` columns. Fixes [#13182](https://github.com/ClickHouse/ClickHouse/issues/13182). [#13887](https://github.com/ClickHouse/ClickHouse/pull/13887) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v20.6.5.8-stable.md b/docs/changelogs/v20.6.5.8-stable.md new file mode 100644 index 00000000000..16462de13c2 --- /dev/null +++ b/docs/changelogs/v20.6.5.8-stable.md @@ -0,0 +1,33 @@ +### ClickHouse release v20.6.5.8-stable FIXME as compared to v20.6.4.44-stable + +#### Performance Improvement +* Backported in [#14189](https://github.com/ClickHouse/ClickHouse/issues/14189): Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* Backported in [#13950](https://github.com/ClickHouse/ClickHouse/issues/13950): Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix +* Backported in [#14090](https://github.com/ClickHouse/ClickHouse/issues/14090): Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)). +* Backported in [#13610](https://github.com/ClickHouse/ClickHouse/issues/13610): Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13413](https://github.com/ClickHouse/ClickHouse/issues/13413): Fixed the deadlock in textlog. It is a part of [#12339](https://github.com/ClickHouse/ClickHouse/issues/12339). This fixes [#12325](https://github.com/ClickHouse/ClickHouse/issues/12325). [#13386](https://github.com/ClickHouse/ClickHouse/pull/13386) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#13537](https://github.com/ClickHouse/ClickHouse/issues/13537): Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14016](https://github.com/ClickHouse/ClickHouse/issues/14016): Fix premature `ON CLUSTER` timeouts for queries that must be executed on a single replica. Fixes [#6704](https://github.com/ClickHouse/ClickHouse/issues/6704), Fixes [#7228](https://github.com/ClickHouse/ClickHouse/issues/7228), Fixes [#13361](https://github.com/ClickHouse/ClickHouse/issues/13361), Fixes [#11884](https://github.com/ClickHouse/ClickHouse/issues/11884). [#13450](https://github.com/ClickHouse/ClickHouse/pull/13450) ([alesapin](https://github.com/alesapin)). +* Backported in [#14000](https://github.com/ClickHouse/ClickHouse/issues/14000): Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14108](https://github.com/ClickHouse/ClickHouse/issues/14108): Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14081](https://github.com/ClickHouse/ClickHouse/issues/14081): Fixes /replicas_status endpoint response status code when verbose=1. [#13722](https://github.com/ClickHouse/ClickHouse/pull/13722) ([javi santana](https://github.com/javisantana)). +* Backported in [#13776](https://github.com/ClickHouse/ClickHouse/issues/13776): Fix logging Settings.Names/Values when log_queries_min_type > QUERY_START. [#13737](https://github.com/ClickHouse/ClickHouse/pull/13737) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13772](https://github.com/ClickHouse/ClickHouse/issues/13772): Fix race condition between DETACH and background merges. Parts may revive after detach. This is continuation of [#8602](https://github.com/ClickHouse/ClickHouse/issues/8602) that did not fix the issue but introduced a test that started to fail in very rare cases, demonstrating the issue. [#13746](https://github.com/ClickHouse/ClickHouse/pull/13746) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13788](https://github.com/ClickHouse/ClickHouse/issues/13788): Add range check for h3KRing function. This fixes [#13633](https://github.com/ClickHouse/ClickHouse/issues/13633). [#13752](https://github.com/ClickHouse/ClickHouse/pull/13752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13801](https://github.com/ClickHouse/ClickHouse/issues/13801): Fix step overflow in range(). [#13790](https://github.com/ClickHouse/ClickHouse/pull/13790) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13810](https://github.com/ClickHouse/ClickHouse/issues/13810): Fix reading from MergeTree table with INDEX of type SET fails when comparing against NULL. This fixes [#13686](https://github.com/ClickHouse/ClickHouse/issues/13686). [#13793](https://github.com/ClickHouse/ClickHouse/pull/13793) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#13833](https://github.com/ClickHouse/ClickHouse/issues/13833): Fix topK/topKWeighted merge (with non-default parameters). [#13817](https://github.com/ClickHouse/ClickHouse/pull/13817) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13974](https://github.com/ClickHouse/ClickHouse/issues/13974): Fixed potential deadlock when renaming `Distributed` table. [#13922](https://github.com/ClickHouse/ClickHouse/pull/13922) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#14061](https://github.com/ClickHouse/ClickHouse/issues/14061): Fix wrong results in select queries with `DISTINCT` keyword in case `optimize_duplicate_order_by_and_distinct` setting is enabled. [#13925](https://github.com/ClickHouse/ClickHouse/pull/13925) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#14076](https://github.com/ClickHouse/ClickHouse/issues/14076): Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#14138](https://github.com/ClickHouse/ClickHouse/issues/14138): Fix pointInPolygon with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)). +* Backported in [#14172](https://github.com/ClickHouse/ClickHouse/issues/14172): Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14294](https://github.com/ClickHouse/ClickHouse/issues/14294): Fix crash when INERT INTO Kafka engine table from an empty result set with a subquery. ... [#14203](https://github.com/ClickHouse/ClickHouse/pull/14203) ([Dongdong Yang](https://github.com/donge)). +* Backported in [#14243](https://github.com/ClickHouse/ClickHouse/issues/14243): Fixed incorrect sorting order if LowCardinality column. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14306](https://github.com/ClickHouse/ClickHouse/issues/14306): Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes [#13861](https://github.com/ClickHouse/ClickHouse/issues/13861). [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#14340](https://github.com/ClickHouse/ClickHouse/issues/14340): Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.6.6.7-stable.md b/docs/changelogs/v20.6.6.7-stable.md new file mode 100644 index 00000000000..5dfc65eed36 --- /dev/null +++ b/docs/changelogs/v20.6.6.7-stable.md @@ -0,0 +1,13 @@ +### ClickHouse release v20.6.6.7-stable FIXME as compared to v20.6.5.8-stable + +#### Improvement +* Backported in [#14359](https://github.com/ClickHouse/ClickHouse/issues/14359): Added Redis requirepass authorization. [#13688](https://github.com/ClickHouse/ClickHouse/pull/13688) ([Ivan Torgashov](https://github.com/it1804)). + +#### Bug Fix +* Backported in [#14365](https://github.com/ClickHouse/ClickHouse/issues/14365): Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#14540](https://github.com/ClickHouse/ClickHouse/issues/14540): Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#14488](https://github.com/ClickHouse/ClickHouse/issues/14488): Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)). +* Backported in [#14483](https://github.com/ClickHouse/ClickHouse/issues/14483): Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14598](https://github.com/ClickHouse/ClickHouse/issues/14598): Fix rare segfaults in functions with combinator -Resample, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#14667](https://github.com/ClickHouse/ClickHouse/issues/14667): Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). + diff --git a/docs/changelogs/v20.6.7.4-stable.md b/docs/changelogs/v20.6.7.4-stable.md new file mode 100644 index 00000000000..f1ad463ae0d --- /dev/null +++ b/docs/changelogs/v20.6.7.4-stable.md @@ -0,0 +1,9 @@ +### ClickHouse release v20.6.7.4-stable FIXME as compared to v20.6.6.7-stable + +#### Bug Fix +* Backported in [#14726](https://github.com/ClickHouse/ClickHouse/issues/14726): Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#14805](https://github.com/ClickHouse/ClickHouse/issues/14805): Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Backported in [#14723](https://github.com/ClickHouse/ClickHouse/issues/14723): Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#14913](https://github.com/ClickHouse/ClickHouse/issues/14913): Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#14945](https://github.com/ClickHouse/ClickHouse/issues/14945): Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.6.8.5-stable.md b/docs/changelogs/v20.6.8.5-stable.md new file mode 100644 index 00000000000..530e4958d34 --- /dev/null +++ b/docs/changelogs/v20.6.8.5-stable.md @@ -0,0 +1,32 @@ +### ClickHouse release v20.6.8.5-stable FIXME as compared to v20.6.7.4-stable + +#### Improvement +* Backported in [#15567](https://github.com/ClickHouse/ClickHouse/issues/15567): Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix +* Backported in [#15018](https://github.com/ClickHouse/ClickHouse/issues/15018): Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14826](https://github.com/ClickHouse/ClickHouse/issues/14826): Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15147](https://github.com/ClickHouse/ClickHouse/issues/15147): Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#15253](https://github.com/ClickHouse/ClickHouse/issues/15253): Fixed segfault in CacheDictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14986](https://github.com/ClickHouse/ClickHouse/issues/14986): Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#14968](https://github.com/ClickHouse/ClickHouse/issues/14968): Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Backported in [#15080](https://github.com/ClickHouse/ClickHouse/issues/15080): Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15057](https://github.com/ClickHouse/ClickHouse/issues/15057): If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15050](https://github.com/ClickHouse/ClickHouse/issues/15050): We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15143](https://github.com/ClickHouse/ClickHouse/issues/15143): Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15223](https://github.com/ClickHouse/ClickHouse/issues/15223): Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Backported in [#15404](https://github.com/ClickHouse/ClickHouse/issues/15404): Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15279](https://github.com/ClickHouse/ClickHouse/issues/15279): Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15412](https://github.com/ClickHouse/ClickHouse/issues/15412): Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#15339](https://github.com/ClickHouse/ClickHouse/issues/15339): Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Backported in [#15335](https://github.com/ClickHouse/ClickHouse/issues/15335): Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Backported in [#15445](https://github.com/ClickHouse/ClickHouse/issues/15445): Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Backported in [#15508](https://github.com/ClickHouse/ClickHouse/issues/15508): Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Backported in [#15616](https://github.com/ClickHouse/ClickHouse/issues/15616): Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Backported in [#15561](https://github.com/ClickHouse/ClickHouse/issues/15561): Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Backported in [#15728](https://github.com/ClickHouse/ClickHouse/issues/15728): Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15654](https://github.com/ClickHouse/ClickHouse/issues/15654): Fix 'Database doesn't exist.' in queries with IN and Distributed table when there's no database on initiator. [#15538](https://github.com/ClickHouse/ClickHouse/pull/15538) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15586](https://github.com/ClickHouse/ClickHouse/issues/15586): Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15664](https://github.com/ClickHouse/ClickHouse/issues/15664): Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15714](https://github.com/ClickHouse/ClickHouse/issues/15714): Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v20.6.9.1-stable.md b/docs/changelogs/v20.6.9.1-stable.md new file mode 100644 index 00000000000..f5860bcb3ee --- /dev/null +++ b/docs/changelogs/v20.6.9.1-stable.md @@ -0,0 +1,2 @@ +### ClickHouse release v20.6.9.1-stable FIXME as compared to v20.6.8.5-stable + diff --git a/docs/changelogs/v20.7.1.4310-prestable.md b/docs/changelogs/v20.7.1.4310-prestable.md new file mode 100644 index 00000000000..a941e071fc6 --- /dev/null +++ b/docs/changelogs/v20.7.1.4310-prestable.md @@ -0,0 +1,170 @@ +### ClickHouse release v20.7.1.4310-prestable FIXME as compared to v20.6.1.4066-prestable + +#### Backward Incompatible Change +* Deprecate special printing of zero Date/DateTime values as `0000-00-00` and `0000-00-00 00:00:00`. [#12442](https://github.com/ClickHouse/ClickHouse/pull/12442) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Function `modulo` (operator `%`) with at least one floating point number as argument will calculate remainder of division directly on floating point numbers without converting both arguments to integers. It makes behaviour compatible with most of DBMS. This also applicable for Date and DateTime data types. Added alias `mod`. This closes [#7323](https://github.com/ClickHouse/ClickHouse/issues/7323). [#12585](https://github.com/ClickHouse/ClickHouse/pull/12585) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* - Added support of LDAP authentication for preconfigured users ("Simple Bind" method). [#11234](https://github.com/ClickHouse/ClickHouse/pull/11234) ([Denis Glazachev](https://github.com/traceon)). +* Add mapAdd and mapSubtract functions for working with tuple maps. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Added `system.crash_log` table into which stack traces for fatal errors are collected. [#12316](https://github.com/ClickHouse/ClickHouse/pull/12316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `bayesAB` function for bayesian-ab-testing. [#12327](https://github.com/ClickHouse/ClickHouse/pull/12327) ([achimbab](https://github.com/achimbab)). +* Integration with [COS](https://intl.cloud.tencent.com/product/cos). [#12386](https://github.com/ClickHouse/ClickHouse/pull/12386) ([fastio](https://github.com/fastio)). +* - Add FROM_UNIXTIME function, related to [12149](https://github.com/ClickHouse/ClickHouse/issues/12149). [#12484](https://github.com/ClickHouse/ClickHouse/pull/12484) ([flynn](https://github.com/ucasfl)). +* A function `formatRow` is added to support turning arbitrary expressions into a string via given format. It's useful for manipulating SQL outputs and is quite versatile combined with the `columns` function. [#12574](https://github.com/ClickHouse/ClickHouse/pull/12574) ([Amos Bird](https://github.com/amosbird)). +* Add setting `allow_non_metadata_alters` which restricts to execute `ALTER` queries which modify data on disk. Disabled be default. Closes [#11547](https://github.com/ClickHouse/ClickHouse/issues/11547). [#12635](https://github.com/ClickHouse/ClickHouse/pull/12635) ([alesapin](https://github.com/alesapin)). +* Add `minMap` and `maxMap` functions support to `SimpleAggregateFunction`. [#12662](https://github.com/ClickHouse/ClickHouse/pull/12662) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Added http headers `X-ClickHouse-Database` and `X-ClickHouse-Format` which may be used to set default database and output format. [#12981](https://github.com/ClickHouse/ClickHouse/pull/12981) ([hcz](https://github.com/hczhcz)). +* Implement user-defined settings. [#13013](https://github.com/ClickHouse/ClickHouse/pull/13013) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Performance Improvement +* Optimize PK lookup for queries that match exact PK range. [#12277](https://github.com/ClickHouse/ClickHouse/pull/12277) ([Ivan Babrou](https://github.com/bobrik)). +* Attempt to implement streaming optimization in `DiskS3`. [#12434](https://github.com/ClickHouse/ClickHouse/pull/12434) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Replaces monotonous functions with its argument in ORDER BY if `set optimize_monotonous_functions_in_order_by=1`. [#12467](https://github.com/ClickHouse/ClickHouse/pull/12467) ([Artem Zuikov](https://github.com/4ertus2)). +* Converts String-type arguments of function "if" and "transform" into enum if `set optimize_if_transform_strings_to_enum = 1`. [#12515](https://github.com/ClickHouse/ClickHouse/pull/12515) ([Artem Zuikov](https://github.com/4ertus2)). +* Parallel PK lookup and skipping index stages on parts, as described in [#11564](https://github.com/ClickHouse/ClickHouse/issues/11564). [#12589](https://github.com/ClickHouse/ClickHouse/pull/12589) ([Ivan Babrou](https://github.com/bobrik)). +* Push down `LIMIT` step for query plan. [#13016](https://github.com/ClickHouse/ClickHouse/pull/13016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Slightly improve performance of aggregation by UInt8/UInt16 keys. [#13055](https://github.com/ClickHouse/ClickHouse/pull/13055) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly improve performance of aggregation by UInt8/UInt16 keys. [#13091](https://github.com/ClickHouse/ClickHouse/pull/13091) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Adds a new type of polygon dictionary which uses a recursively built grid to reduce the number of polygons which need to be checked for each point. [#9278](https://github.com/ClickHouse/ClickHouse/pull/9278) ([achulkov2](https://github.com/achulkov2)). +* Allow TabSeparatedRaw as an input format. [#12009](https://github.com/ClickHouse/ClickHouse/pull/12009) ([hcz](https://github.com/hczhcz)). +* Separated `AWSAuthV4Signer` into different logger, removed "AWSClient: AWSClient". [#12320](https://github.com/ClickHouse/ClickHouse/pull/12320) ([Vladimir Chebotarev](https://github.com/excitoon)). +* - Implement `RENAME DATABASE` and `RENAME DICTIONARY` for `Atomic` database engine - Add implicit `{uuid}` macro, which can be used in ZooKeeper path for `ReplicatedMergeTree`. It works with `CREATE ... ON CLUSTER ...` queries. Set `show_table_uuid_in_table_create_query_if_not_nil` to `true` to use it. - Make `ReplicatedMergeTree` engine arguments optional, `/clickhouse/tables/{uuid}/{shard}/` and `{replica}` are used by default. Closes [#12135](https://github.com/ClickHouse/ClickHouse/issues/12135). - Minor fixes. - These changes break backward compatibility of `Atomic` database engine. Previously created `Atomic` databases must be manually converted to new format. [#12343](https://github.com/ClickHouse/ClickHouse/pull/12343) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Now joinGet supports multi-key lookup. [#12418](https://github.com/ClickHouse/ClickHouse/pull/12418) ([Amos Bird](https://github.com/amosbird)). +* Rollback insertion errors in `Log`, `TinyLog`, `StripeLog` engines. In previous versions insertion error lead to inconsisent table state (this works as documented and it is normal for these table engines). This fixes [#12402](https://github.com/ClickHouse/ClickHouse/issues/12402). [#12426](https://github.com/ClickHouse/ClickHouse/pull/12426) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid overflow in parsing of DateTime values that will lead to negative unix timestamp in their timezone (for example, `1970-01-01 00:00:00` in Moscow). Saturate to zero instead. This fixes [#3470](https://github.com/ClickHouse/ClickHouse/issues/3470). This fixes [#4172](https://github.com/ClickHouse/ClickHouse/issues/4172). [#12443](https://github.com/ClickHouse/ClickHouse/pull/12443) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add several metrics for requests to S3 storages. [#12464](https://github.com/ClickHouse/ClickHouse/pull/12464) ([ianton-ru](https://github.com/ianton-ru)). +* Changes default value for `multiple_joins_rewriter_version` to 2. It enables new multiple joins rewriter that knows about column names. [#12469](https://github.com/ClickHouse/ClickHouse/pull/12469) ([Artem Zuikov](https://github.com/4ertus2)). +* Allow to set JOIN kind and type in more standad way: `LEFT SEMI JOIN` instead of `SEMI LEFT JOIN`. For now both are correct. [#12520](https://github.com/ClickHouse/ClickHouse/pull/12520) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix explain query format overwrite by default, issue [#12432](https://github.com/ClickHouse/ClickHouse/issues/12432). [#12541](https://github.com/ClickHouse/ClickHouse/pull/12541) ([BohuTANG](https://github.com/BohuTANG)). +* Add SelectedRows and SelectedBytes events. [#12638](https://github.com/ClickHouse/ClickHouse/pull/12638) ([ianton-ru](https://github.com/ianton-ru)). +* Add `current_database` information to `system.query_log`. [#12652](https://github.com/ClickHouse/ClickHouse/pull/12652) ([Amos Bird](https://github.com/amosbird)). +* Support truncate table without table keyword. [#12653](https://github.com/ClickHouse/ClickHouse/pull/12653) ([Winter Zhang](https://github.com/zhang2014)). +* Now exceptions forwarded to the client if an error happened during ALTER or mutation. Closes [#11329](https://github.com/ClickHouse/ClickHouse/issues/11329). [#12666](https://github.com/ClickHouse/ClickHouse/pull/12666) ([alesapin](https://github.com/alesapin)). +* Protect from the cases when user may set `background_pool_size` to value lower than `number_of_free_entries_in_pool_to_execute_mutation` or `number_of_free_entries_in_pool_to_lower_max_size_of_merge`. In these cases ALTERs won't work or the maximum size of merge will be too limited. Saturate values instead. This closes [#10897](https://github.com/ClickHouse/ClickHouse/issues/10897). [#12728](https://github.com/ClickHouse/ClickHouse/pull/12728) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This change makes skipping index efficiency more obvious by showing both skipped and total examined granules. [#12754](https://github.com/ClickHouse/ClickHouse/pull/12754) ([Ivan Babrou](https://github.com/bobrik)). +* This change makes skipping index efficiency more obvious by showing total marks before and after skipping indices. [#12755](https://github.com/ClickHouse/ClickHouse/pull/12755) ([Ivan Babrou](https://github.com/bobrik)). +* Introduce setting `alter_partition_verbose_result` which outputs information about touched parts for some types of `ALTER TABLE ... PARTITION ...` queries (currently `ATTACH` and `FREEZE`). Closes [#8076](https://github.com/ClickHouse/ClickHouse/issues/8076). [#13017](https://github.com/ClickHouse/ClickHouse/pull/13017) ([alesapin](https://github.com/alesapin)). +* Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQueryTimeMicroseconds to system.events. [#13028](https://github.com/ClickHouse/ClickHouse/pull/13028) ([ianton-ru](https://github.com/ianton-ru)). +* break-ing out of a loop because it makes sense to do so. [#13058](https://github.com/ClickHouse/ClickHouse/pull/13058) ([Mark Papadakis](https://github.com/markpapadakis)). +* Keep less amount of logs in ZooKeeper. Avoid too large growth of ZooKeeper nodes in case of offline replicas when having many servers/tables/inserts. [#13100](https://github.com/ClickHouse/ClickHouse/pull/13100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add sanity check for MergeTree settings. If the settings are incorrect, the server will refuse to start or to create a table, printing detailed explanation to the user. [#13153](https://github.com/ClickHouse/ClickHouse/pull/13153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow *Map aggregate functions to work on Arrays with NULLs. Fixes [#13157](https://github.com/ClickHouse/ClickHouse/issues/13157). [#13225](https://github.com/ClickHouse/ClickHouse/pull/13225) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix assert in parseDateTimeBestEffort. This fixes [#12649](https://github.com/ClickHouse/ClickHouse/issues/12649). [#13227](https://github.com/ClickHouse/ClickHouse/pull/13227) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix assert in geohashesInBox. This fixes [#12554](https://github.com/ClickHouse/ClickHouse/issues/12554). [#13229](https://github.com/ClickHouse/ClickHouse/pull/13229) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now broken parts are also reported when encountered in compact part processing. [#13282](https://github.com/ClickHouse/ClickHouse/pull/13282) ([Amos Bird](https://github.com/amosbird)). +* Fix a 'Week'-interval formatting for ATTACH/ALTER/CREATE QUOTA-statements. [#13417](https://github.com/ClickHouse/ClickHouse/pull/13417) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). +* Updated gitignore-files. [#13447](https://github.com/ClickHouse/ClickHouse/pull/13447) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). + +#### Bug Fix +* kafka: fix SIGSEGV if there is an message with error in the middle of the batch. [#12302](https://github.com/ClickHouse/ClickHouse/pull/12302) ([Azat Khuzhin](https://github.com/azat)). +* If MergeTree table does not contain ORDER BY or PARTITION BY, it was possible to request ALTER to CLEAR all the columns and ALTER will stuck. Fixed [#7941](https://github.com/ClickHouse/ClickHouse/issues/7941). [#12382](https://github.com/ClickHouse/ClickHouse/pull/12382) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* SystemLog: do not write to ordinary server log under mutex. This can lead to deadlock if `text_log` is enabled. [#12452](https://github.com/ClickHouse/ClickHouse/pull/12452) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in JOIN with dictionary when we are joining over expression of dictionary key: `t JOIN dict ON expr(dict.id) = t.id`. Disable dictionary join optimisation for this case. [#12458](https://github.com/ClickHouse/ClickHouse/pull/12458) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix SETTINGS parse after FORMAT. [#12480](https://github.com/ClickHouse/ClickHouse/pull/12480) ([Azat Khuzhin](https://github.com/azat)). +* Fix backwards compatibility in binary format of `AggregateFunction(avg, ...)` values. This fixes [#12342](https://github.com/ClickHouse/ClickHouse/issues/12342). [#12486](https://github.com/ClickHouse/ClickHouse/pull/12486) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed performance issue, while reading from compact parts. [#12492](https://github.com/ClickHouse/ClickHouse/pull/12492) ([Anton Popov](https://github.com/CurtizJ)). +* Fixing race condition in live view tables which could cause data duplication. [#12519](https://github.com/ClickHouse/ClickHouse/pull/12519) ([vzakaznikov](https://github.com/vzakaznikov)). +* Better exception for function `in` with invalid number of arguments. [#12529](https://github.com/ClickHouse/ClickHouse/pull/12529) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bug which lead to broken old parts after `ALTER DELETE` query when `enable_mixed_granularity_parts=1`. Fixes [#12536](https://github.com/ClickHouse/ClickHouse/issues/12536). [#12543](https://github.com/ClickHouse/ClickHouse/pull/12543) ([alesapin](https://github.com/alesapin)). +* Now ClickHouse will recalculate checksums for parts when file `checksums.txt` is absent. Broken since [#9827](https://github.com/ClickHouse/ClickHouse/issues/9827). [#12545](https://github.com/ClickHouse/ClickHouse/pull/12545) ([alesapin](https://github.com/alesapin)). +* Remove data for Distributed tables (blocks from async INSERTs) on DROP TABLE. [#12556](https://github.com/ClickHouse/ClickHouse/pull/12556) ([Azat Khuzhin](https://github.com/azat)). +* Fix race condition in external dictionaries with cache layout which can lead server crash. [#12566](https://github.com/ClickHouse/ClickHouse/pull/12566) ([alesapin](https://github.com/alesapin)). +* Fix lack of aliases with function `any`. [#12593](https://github.com/ClickHouse/ClickHouse/pull/12593) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Output of TreeExecutor is not sorted` for `OPTIMIZE DEDUPLICATE`. Fixes [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572). [#12613](https://github.com/ClickHouse/ClickHouse/pull/12613) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible `Pipeline stuck` error for queries with external sorting. Fixes [#12617](https://github.com/ClickHouse/ClickHouse/issues/12617). [#12618](https://github.com/ClickHouse/ClickHouse/pull/12618) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The function `groupArrayMoving*` was not working for distributed queries. It's result was calculated within incorrect data type (without promotion to the largest type). The function `groupArrayMovingAvg` was returning integer number that was inconsistent with the `avg` function. This fixes [#12568](https://github.com/ClickHouse/ClickHouse/issues/12568). [#12622](https://github.com/ClickHouse/ClickHouse/pull/12622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error message about adaptive granularity. [#12624](https://github.com/ClickHouse/ClickHouse/pull/12624) ([alesapin](https://github.com/alesapin)). +* Better exception message in disk access storage. [#12625](https://github.com/ClickHouse/ClickHouse/pull/12625) ([alesapin](https://github.com/alesapin)). +* Exception `There is no supertype...` can be thrown during `ALTER ... UPDATE` in unexpected cases (e.g. when subtracting from UInt64 column). This fixes [#7306](https://github.com/ClickHouse/ClickHouse/issues/7306). This fixes [#4165](https://github.com/ClickHouse/ClickHouse/issues/4165). [#12633](https://github.com/ClickHouse/ClickHouse/pull/12633) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CREATE USER IF NOT EXISTS now doesn't throw exception if the user exists. This fixes [#12507](https://github.com/ClickHouse/ClickHouse/issues/12507). [#12646](https://github.com/ClickHouse/ClickHouse/pull/12646) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add support for function `if` with `Array(UUID)` arguments. This fixes [#11066](https://github.com/ClickHouse/ClickHouse/issues/11066). [#12648](https://github.com/ClickHouse/ClickHouse/pull/12648) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix SIGSEGV in StorageKafka when broker is unavailable (and not only). [#12658](https://github.com/ClickHouse/ClickHouse/pull/12658) ([Azat Khuzhin](https://github.com/azat)). +* fixes [#10572](https://github.com/ClickHouse/ClickHouse/issues/10572) fix bloom filter index with const expression. [#12659](https://github.com/ClickHouse/ClickHouse/pull/12659) ([Winter Zhang](https://github.com/zhang2014)). +* fixes [#12293](https://github.com/ClickHouse/ClickHouse/issues/12293) allow push predicate when subquery contains with clause. [#12663](https://github.com/ClickHouse/ClickHouse/pull/12663) ([Winter Zhang](https://github.com/zhang2014)). +* Fix optimization `optimize_move_functions_out_of_any=1` in case of `any(func())`. [#12664](https://github.com/ClickHouse/ClickHouse/pull/12664) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix memory tracking for input_format_parallel_parsing (by attaching thread to group). [#12672](https://github.com/ClickHouse/ClickHouse/pull/12672) ([Azat Khuzhin](https://github.com/azat)). +* Fix performance with large tuples, which are interpreted as functions in `IN` section. The case when user write `WHERE x IN tuple(1, 2, ...)` instead of `WHERE x IN (1, 2, ...)` for some obscure reason. [#12700](https://github.com/ClickHouse/ClickHouse/pull/12700) ([Anton Popov](https://github.com/CurtizJ)). +* Fix CAST(Nullable(String), Enum()). [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare bug when `ALTER DELETE` and `ALTER MODIFY COLUMN` queries executed simultaneously as a single mutation. Bug leads to an incorrect amount of rows in `count.txt` and as a consequence incorrect data in part. Also, fix a small bug with simultaneous `ALTER RENAME COLUMN` and `ALTER ADD COLUMN`. [#12760](https://github.com/ClickHouse/ClickHouse/pull/12760) ([alesapin](https://github.com/alesapin)). +* Fix unnecessary limiting for the number of threads for selects from local replica. [#12840](https://github.com/ClickHouse/ClickHouse/pull/12840) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix columns duplication for range hashed dictionary created from DDL query. This fixes [#10605](https://github.com/ClickHouse/ClickHouse/issues/10605). [#12857](https://github.com/ClickHouse/ClickHouse/pull/12857) ([alesapin](https://github.com/alesapin)). +* Corrected merge_with_ttl_timeout logic which did not work well when expiration affected more than one partition over one time interval. (Authored by @excitoon). [#12982](https://github.com/ClickHouse/ClickHouse/pull/12982) ([Alexander Kazakov](https://github.com/Akazz)). +* Fix `Block structure mismatch` error for queries with `UNION` and `JOIN`. Fixes [#12602](https://github.com/ClickHouse/ClickHouse/issues/12602). [#12989](https://github.com/ClickHouse/ClickHouse/pull/12989) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash which was possible for queries with `ORDER BY` tuple and small `LIMIT`. Fixes [#12623](https://github.com/ClickHouse/ClickHouse/issues/12623). [#13009](https://github.com/ClickHouse/ClickHouse/pull/13009) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add logging when the mutation is not running because of limited disk space or free threads in the background pool. [#13068](https://github.com/ClickHouse/ClickHouse/pull/13068) ([alesapin](https://github.com/alesapin)). +* Fix error `Cannot convert column because it is constant but values of constants are different in source and result` for remote queries which use deterministic functions in scope of query, but not deterministic between queries, like `now()`, `now64()`, `randConstant()`. Fixes [#11327](https://github.com/ClickHouse/ClickHouse/issues/11327). [#13075](https://github.com/ClickHouse/ClickHouse/pull/13075) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong index analysis with functions. It could lead to pruning wrong parts, while reading from `MergeTree` tables. Fixes [#13060](https://github.com/ClickHouse/ClickHouse/issues/13060). Fixes [#12406](https://github.com/ClickHouse/ClickHouse/issues/12406). [#13081](https://github.com/ClickHouse/ClickHouse/pull/13081) ([Anton Popov](https://github.com/CurtizJ)). +* Fix segfault when mutation killed and the server tries to send the exception to the client. [#13169](https://github.com/ClickHouse/ClickHouse/pull/13169) ([alesapin](https://github.com/alesapin)). +* AvroConfluent: Skip Kafka tombstone records AvroConfluent: Support skipping broken records ... [#13203](https://github.com/ClickHouse/ClickHouse/pull/13203) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Fix DateTime64 conversion functions with constant argument. [#13205](https://github.com/ClickHouse/ClickHouse/pull/13205) ([Azat Khuzhin](https://github.com/azat)). +* Fix assert in `arrayElement` function in case of array elements are Nullable and array subscript is also Nullable. This fixes [#12172](https://github.com/ClickHouse/ClickHouse/issues/12172). [#13224](https://github.com/ClickHouse/ClickHouse/pull/13224) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix function if with nullable constexpr as cond that is not literal NULL. Fixes [#12463](https://github.com/ClickHouse/ClickHouse/issues/12463). [#13226](https://github.com/ClickHouse/ClickHouse/pull/13226) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Return passed number for numbers with MSB set in roundUpToPowerOfTwoOrZero(). [#13234](https://github.com/ClickHouse/ClickHouse/pull/13234) ([Azat Khuzhin](https://github.com/azat)). +* Fix assertion in KeyCondition when primary key contains expression with monotonic function and query contains comparison with constant whose type is different. This fixes [#12465](https://github.com/ClickHouse/ClickHouse/issues/12465). [#13251](https://github.com/ClickHouse/ClickHouse/pull/13251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix potentially low performance and slightly incorrect result for `uniqExact`, `topK`, `sumDistinct` and similar aggregate functions called on Float types with NaN values. It also triggered assert in debug build. This fixes [#12491](https://github.com/ClickHouse/ClickHouse/issues/12491). [#13254](https://github.com/ClickHouse/ClickHouse/pull/13254) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The server may crash if user passed specifically crafted arguments to the function `h3ToChildren`. This fixes [#13275](https://github.com/ClickHouse/ClickHouse/issues/13275). [#13277](https://github.com/ClickHouse/ClickHouse/pull/13277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Totals having transform was already added to pipeline` in case of a query from delayed replica. [#13290](https://github.com/ClickHouse/ClickHouse/pull/13290) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash in `LEFT ASOF JOIN` with `join_use_nulls=1`. [#13291](https://github.com/ClickHouse/ClickHouse/pull/13291) ([Artem Zuikov](https://github.com/4ertus2)). +* Throw error on `arrayJoin()` function in `JOIN ON` section. [#13330](https://github.com/ClickHouse/ClickHouse/pull/13330) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix segfault when function `groupArrayMovingSum` deserializes empty state. Fixes [#13339](https://github.com/ClickHouse/ClickHouse/issues/13339). [#13341](https://github.com/ClickHouse/ClickHouse/pull/13341) ([alesapin](https://github.com/alesapin)). +* Fixed the deadlock in textlog. It is a part of [#12339](https://github.com/ClickHouse/ClickHouse/issues/12339). This fixes [#12325](https://github.com/ClickHouse/ClickHouse/issues/12325). [#13386](https://github.com/ClickHouse/ClickHouse/pull/13386) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix PrettyCompactMonoBlock for clickhouse-local. Fix extremes/totals with PrettyCompactMonoBlock. Fixes [#7746](https://github.com/ClickHouse/ClickHouse/issues/7746). [#13394](https://github.com/ClickHouse/ClickHouse/pull/13394) ([Azat Khuzhin](https://github.com/azat)). +* Fix queries with constant columns and `ORDER BY` prefix of primary key. [#13396](https://github.com/ClickHouse/ClickHouse/pull/13396) ([Anton Popov](https://github.com/CurtizJ)). +* Fix empty output for `Arrow` and `Parquet` formats in case if query return zero rows. It was done because empty output is not valid for this formats. [#13399](https://github.com/ClickHouse/ClickHouse/pull/13399) ([hcz](https://github.com/hczhcz)). +* Fix `aggregate function any(x) is found inside another aggregate function in query` error with `SET optimize_move_functions_out_of_any = 1` and aliases inside `any()`. [#13419](https://github.com/ClickHouse/ClickHouse/pull/13419) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix invalid return type for comparison of tuples with `NULL` elements. Fixes [#12461](https://github.com/ClickHouse/ClickHouse/issues/12461). [#13420](https://github.com/ClickHouse/ClickHouse/pull/13420) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error in `parseDateTimeBestEffort` function when unix timestamp was passed as an argument. This fixes [#13362](https://github.com/ClickHouse/ClickHouse/issues/13362). [#13441](https://github.com/ClickHouse/ClickHouse/pull/13441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Add compiler option to control that stack frames are not too large. This will help to run the code in fibers with small stack size. [#11524](https://github.com/ClickHouse/ClickHouse/pull/11524) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rework integrational configuration paths. [#12285](https://github.com/ClickHouse/ClickHouse/pull/12285) ([Ilya Yatsishin](https://github.com/qoega)). +* Add docker image for fast tests. [#12294](https://github.com/ClickHouse/ClickHouse/pull/12294) ([alesapin](https://github.com/alesapin)). +* Update fmtlib to master (7.0.1). [#12446](https://github.com/ClickHouse/ClickHouse/pull/12446) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add PEERDIR(protoc) as protobuf format parses .proto file in runtime. [#12475](https://github.com/ClickHouse/ClickHouse/pull/12475) ([Yuriy Chernyshov](https://github.com/georgthegreat)). +* Now we check that server is able to start after stress tests run. This fixes [#12473](https://github.com/ClickHouse/ClickHouse/issues/12473). [#12496](https://github.com/ClickHouse/ClickHouse/pull/12496) ([alesapin](https://github.com/alesapin)). +* Improve performance of TestKeeper. This will speedup tests with heavy usage of Replicated tables. [#12505](https://github.com/ClickHouse/ClickHouse/pull/12505) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adding RBAC syntax tests in TestFlows. [#12642](https://github.com/ClickHouse/ClickHouse/pull/12642) ([vzakaznikov](https://github.com/vzakaznikov)). +* Apply random query mutations (fuzzing) in stress tests. [#12734](https://github.com/ClickHouse/ClickHouse/pull/12734) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Connector-ODBC updated to mysql-connector-odbc-8.0.21. [#12739](https://github.com/ClickHouse/ClickHouse/pull/12739) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix build of test under Mac OS X. This closes [#12767](https://github.com/ClickHouse/ClickHouse/issues/12767). [#12772](https://github.com/ClickHouse/ClickHouse/pull/12772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* increasing timeouts in testflows tests. [#12949](https://github.com/ClickHouse/ClickHouse/pull/12949) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add a test for `ALTER TABLE CLEAR COLUMN` query for primary key columns. [#12951](https://github.com/ClickHouse/ClickHouse/pull/12951) ([alesapin](https://github.com/alesapin)). +* Do not build helper_container image inside integrational tests. Build docker container in CI and use pre-built helper_container in integration tests. [#12953](https://github.com/ClickHouse/ClickHouse/pull/12953) ([Ilya Yatsishin](https://github.com/qoega)). +* Check an ability that we able to restore the backup from an old version to the new version. This closes [#8979](https://github.com/ClickHouse/ClickHouse/issues/8979). [#12959](https://github.com/ClickHouse/ClickHouse/pull/12959) ([alesapin](https://github.com/alesapin)). +* Fix MSan error in "rdkafka" library. This closes [#12990](https://github.com/ClickHouse/ClickHouse/issues/12990). Updated `rdkafka` to version 1.5 (master). [#12991](https://github.com/ClickHouse/ClickHouse/pull/12991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rerun some tests in fast test check. [#12992](https://github.com/ClickHouse/ClickHouse/pull/12992) ([alesapin](https://github.com/alesapin)). +* Adding extra xfails for some ldap tests. [#13054](https://github.com/ClickHouse/ClickHouse/pull/13054) ([vzakaznikov](https://github.com/vzakaznikov)). +* Added tests for RBAC functionality of `SELECT` privilege in TestFlows. [#13061](https://github.com/ClickHouse/ClickHouse/pull/13061) ([Ritaank Tiwari](https://github.com/ritaank)). +* Rewrote Function tests to gtest. Removed useless includes from tests. [#13073](https://github.com/ClickHouse/ClickHouse/pull/13073) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Improve cache purge in documentation deploy script. [#13107](https://github.com/ClickHouse/ClickHouse/pull/13107) ([alesapin](https://github.com/alesapin)). +* Fixing 00960_live_view_watch_events_live.py test. [#13108](https://github.com/ClickHouse/ClickHouse/pull/13108) ([vzakaznikov](https://github.com/vzakaznikov)). +* Small fixes to the RBAC SRS. [#13152](https://github.com/ClickHouse/ClickHouse/pull/13152) ([vzakaznikov](https://github.com/vzakaznikov)). +* Even more retries in zkutil gtest to prevent test flakiness. [#13165](https://github.com/ClickHouse/ClickHouse/pull/13165) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add script which set labels for pull requests in GitHub hook. [#13183](https://github.com/ClickHouse/ClickHouse/pull/13183) ([alesapin](https://github.com/alesapin)). +* Use `shellcheck` for sh tests linting. [#13200](https://github.com/ClickHouse/ClickHouse/pull/13200) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use `shellcheck` for sh tests linting. [#13207](https://github.com/ClickHouse/ClickHouse/pull/13207) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix libunwind build in AArch64. This fixes [#13204](https://github.com/ClickHouse/ClickHouse/issues/13204). [#13208](https://github.com/ClickHouse/ClickHouse/pull/13208) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Function `materialize` (the function for ClickHouse testing) will work for NULL as expected - by transforming it to non-constant column. [#13212](https://github.com/ClickHouse/ClickHouse/pull/13212) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now fast test will wait server with retries. [#13284](https://github.com/ClickHouse/ClickHouse/pull/13284) ([alesapin](https://github.com/alesapin)). +* Applying LDAP authentication test fixes. [#13310](https://github.com/ClickHouse/ClickHouse/pull/13310) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fix timeout error during server restart in the stress test. [#13321](https://github.com/ClickHouse/ClickHouse/pull/13321) ([alesapin](https://github.com/alesapin)). +* - Added testing for RBAC functionality of INSERT privilege in TestFlows. - Expanded tables on which SELECT is being tested. - Added Requirements to match new table engine tests. [#13340](https://github.com/ClickHouse/ClickHouse/pull/13340) ([MyroTk](https://github.com/MyroTk)). +* Remove some of recursive submodules. See [#13378](https://github.com/ClickHouse/ClickHouse/issues/13378). [#13379](https://github.com/ClickHouse/ClickHouse/pull/13379) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ensure that all the submodules are from proper URLs. Continuation of [#13379](https://github.com/ClickHouse/ClickHouse/issues/13379). This fixes [#13378](https://github.com/ClickHouse/ClickHouse/issues/13378). [#13397](https://github.com/ClickHouse/ClickHouse/pull/13397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Other +* Avoid re-loading completion from the history file after each query (to avoid history overlaps with other client sessions). [#13086](https://github.com/ClickHouse/ClickHouse/pull/13086) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump numpy from 1.18.5 to 1.19.1 in /docs/tools'. [#12655](https://github.com/ClickHouse/ClickHouse/pull/12655) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump urllib3 from 1.25.9 to 1.25.10 in /docs/tools'. [#12703](https://github.com/ClickHouse/ClickHouse/pull/12703) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Revert "Refactor joinGet and implement multi-key lookup."'. [#12708](https://github.com/ClickHouse/ClickHouse/pull/12708) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Abort on std::out_of_range in debug builds"'. [#12752](https://github.com/ClickHouse/ClickHouse/pull/12752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Bump protobuf from 3.12.2 to 3.12.4 in /docs/tools'. [#13102](https://github.com/ClickHouse/ClickHouse/pull/13102) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Merge [#12574](https://github.com/ClickHouse/ClickHouse/issues/12574)'. [#13158](https://github.com/ClickHouse/ClickHouse/pull/13158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer…"'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.7.2.30-stable.md b/docs/changelogs/v20.7.2.30-stable.md new file mode 100644 index 00000000000..a79affdf267 --- /dev/null +++ b/docs/changelogs/v20.7.2.30-stable.md @@ -0,0 +1,41 @@ +### ClickHouse release v20.7.2.30-stable FIXME as compared to v20.7.1.4310-prestable + +#### Performance Improvement +* Backported in [#14188](https://github.com/ClickHouse/ClickHouse/issues/14188): Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* Backported in [#13918](https://github.com/ClickHouse/ClickHouse/issues/13918): Fix data race in `lgamma` function. This race was caught only in `tsan`, no side effects a really happened. [#13842](https://github.com/ClickHouse/ClickHouse/pull/13842) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13951](https://github.com/ClickHouse/ClickHouse/issues/13951): Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix +* Backported in [#14088](https://github.com/ClickHouse/ClickHouse/issues/14088): Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)). +* Backported in [#13559](https://github.com/ClickHouse/ClickHouse/issues/13559): Fix access to redis dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13509](https://github.com/ClickHouse/ClickHouse/issues/13509): Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#13607](https://github.com/ClickHouse/ClickHouse/issues/13607): Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13525](https://github.com/ClickHouse/ClickHouse/issues/13525): Fix possible race in `StorageMemory`. https://clickhouse-test-reports.s3.yandex.net/0/9cac8a7244063d2092ad25d45502611e18d3749c/stress_test_(thread)/stderr.log Have no idea how to write a test. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13536](https://github.com/ClickHouse/ClickHouse/issues/13536): Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14015](https://github.com/ClickHouse/ClickHouse/issues/14015): Fix premature `ON CLUSTER` timeouts for queries that must be executed on a single replica. Fixes [#6704](https://github.com/ClickHouse/ClickHouse/issues/6704), Fixes [#7228](https://github.com/ClickHouse/ClickHouse/issues/7228), Fixes [#13361](https://github.com/ClickHouse/ClickHouse/issues/13361), Fixes [#11884](https://github.com/ClickHouse/ClickHouse/issues/11884). [#13450](https://github.com/ClickHouse/ClickHouse/pull/13450) ([alesapin](https://github.com/alesapin)). +* Backported in [#13551](https://github.com/ClickHouse/ClickHouse/issues/13551): Fix secondary indices corruption in compact parts. [#13538](https://github.com/ClickHouse/ClickHouse/pull/13538) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#13998](https://github.com/ClickHouse/ClickHouse/issues/13998): Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#13664](https://github.com/ClickHouse/ClickHouse/issues/13664): Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#13704](https://github.com/ClickHouse/ClickHouse/issues/13704): Fix typo in error message about `The value of 'number_of_free_entries_in_pool_to_lower_max_size_of_merge' setting`. [#13678](https://github.com/ClickHouse/ClickHouse/pull/13678) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13718](https://github.com/ClickHouse/ClickHouse/issues/13718): Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#13702](https://github.com/ClickHouse/ClickHouse/issues/13702): Do not optimize any(arrayJoin()) -> arrayJoin() under optimize_move_functions_out_of_any. [#13681](https://github.com/ClickHouse/ClickHouse/pull/13681) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#14107](https://github.com/ClickHouse/ClickHouse/issues/14107): Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13734](https://github.com/ClickHouse/ClickHouse/issues/13734): Fix incorrect message in `clickhouse-server.init` while checking user and group. [#13711](https://github.com/ClickHouse/ClickHouse/pull/13711) ([ylchou](https://github.com/ylchou)). +* Backported in [#13773](https://github.com/ClickHouse/ClickHouse/issues/13773): Fix logging Settings.Names/Values when log_queries_min_type > QUERY_START. [#13737](https://github.com/ClickHouse/ClickHouse/pull/13737) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13771](https://github.com/ClickHouse/ClickHouse/issues/13771): Fix race condition between DETACH and background merges. Parts may revive after detach. This is continuation of [#8602](https://github.com/ClickHouse/ClickHouse/issues/8602) that did not fix the issue but introduced a test that started to fail in very rare cases, demonstrating the issue. [#13746](https://github.com/ClickHouse/ClickHouse/pull/13746) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13787](https://github.com/ClickHouse/ClickHouse/issues/13787): Add range check for h3KRing function. This fixes [#13633](https://github.com/ClickHouse/ClickHouse/issues/13633). [#13752](https://github.com/ClickHouse/ClickHouse/pull/13752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13886](https://github.com/ClickHouse/ClickHouse/issues/13886): Fixed `Directory not empty` error when concurrently executing `DROP DATABASE` and `CREATE TABLE`. [#13756](https://github.com/ClickHouse/ClickHouse/pull/13756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#13800](https://github.com/ClickHouse/ClickHouse/issues/13800): Fix step overflow in range(). [#13790](https://github.com/ClickHouse/ClickHouse/pull/13790) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13813](https://github.com/ClickHouse/ClickHouse/issues/13813): Fix reading from MergeTree table with INDEX of type SET fails when comparing against NULL. This fixes [#13686](https://github.com/ClickHouse/ClickHouse/issues/13686). [#13793](https://github.com/ClickHouse/ClickHouse/pull/13793) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#13834](https://github.com/ClickHouse/ClickHouse/issues/13834): Fix topK/topKWeighted merge (with non-default parameters). [#13817](https://github.com/ClickHouse/ClickHouse/pull/13817) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#13904](https://github.com/ClickHouse/ClickHouse/issues/13904): Fix incorrect sorting for `FixedString` columns. Fixes [#13182](https://github.com/ClickHouse/ClickHouse/issues/13182). [#13887](https://github.com/ClickHouse/ClickHouse/pull/13887) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#13977](https://github.com/ClickHouse/ClickHouse/issues/13977): Fixed potential deadlock when renaming `Distributed` table. [#13922](https://github.com/ClickHouse/ClickHouse/pull/13922) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#14031](https://github.com/ClickHouse/ClickHouse/issues/14031): Fix wrong results in select queries with `DISTINCT` keyword in case `optimize_duplicate_order_by_and_distinct` setting is enabled. [#13925](https://github.com/ClickHouse/ClickHouse/pull/13925) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#14078](https://github.com/ClickHouse/ClickHouse/issues/14078): Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#14184](https://github.com/ClickHouse/ClickHouse/issues/14184): When waiting for a dictionary update to complete, use the timeout specified by `query_wait_timeout_milliseconds` setting instead of a hard-coded value. [#14105](https://github.com/ClickHouse/ClickHouse/pull/14105) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14169](https://github.com/ClickHouse/ClickHouse/issues/14169): Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14245](https://github.com/ClickHouse/ClickHouse/issues/14245): Fixed incorrect sorting order if LowCardinality column. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14250](https://github.com/ClickHouse/ClickHouse/issues/14250): Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277 . [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)). + diff --git a/docs/changelogs/v20.7.3.7-stable.md b/docs/changelogs/v20.7.3.7-stable.md new file mode 100644 index 00000000000..ae888bb5506 --- /dev/null +++ b/docs/changelogs/v20.7.3.7-stable.md @@ -0,0 +1,25 @@ +### ClickHouse release v20.7.3.7-stable FIXME as compared to v20.7.2.30-stable + +#### Improvement +* Backported in [#14361](https://github.com/ClickHouse/ClickHouse/issues/14361): Added Redis requirepass authorization. [#13688](https://github.com/ClickHouse/ClickHouse/pull/13688) ([Ivan Torgashov](https://github.com/it1804)). +* Backported in [#14875](https://github.com/ClickHouse/ClickHouse/issues/14875): Allow using multi-volume storage configuration in storage Distributed. [#14839](https://github.com/ClickHouse/ClickHouse/pull/14839) ([Pavel Kovalenko](https://github.com/Jokser)). + +#### Bug Fix +* Backported in [#14080](https://github.com/ClickHouse/ClickHouse/issues/14080): Fixes /replicas_status endpoint response status code when verbose=1. [#13722](https://github.com/ClickHouse/ClickHouse/pull/13722) ([javi santana](https://github.com/javisantana)). +* Backported in [#14388](https://github.com/ClickHouse/ClickHouse/issues/14388): Fix GRANT ALL statement when executed on a non-global level. [#13987](https://github.com/ClickHouse/ClickHouse/pull/13987) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#14137](https://github.com/ClickHouse/ClickHouse/issues/14137): Fix pointInPolygon with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)). +* Backported in [#14292](https://github.com/ClickHouse/ClickHouse/issues/14292): Fix crash when INERT INTO Kafka engine table from an empty result set with a subquery. ... [#14203](https://github.com/ClickHouse/ClickHouse/pull/14203) ([Dongdong Yang](https://github.com/donge)). +* Backported in [#14307](https://github.com/ClickHouse/ClickHouse/issues/14307): Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes [#13861](https://github.com/ClickHouse/ClickHouse/issues/13861). [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#14342](https://github.com/ClickHouse/ClickHouse/issues/14342): Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)). +* Backported in [#14364](https://github.com/ClickHouse/ClickHouse/issues/14364): Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#14507](https://github.com/ClickHouse/ClickHouse/issues/14507): Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#14486](https://github.com/ClickHouse/ClickHouse/issues/14486): Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)). +* Backported in [#14482](https://github.com/ClickHouse/ClickHouse/issues/14482): Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14601](https://github.com/ClickHouse/ClickHouse/issues/14601): Fix rare segfaults in functions with combinator -Resample, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#14728](https://github.com/ClickHouse/ClickHouse/issues/14728): Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#14665](https://github.com/ClickHouse/ClickHouse/issues/14665): Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#14806](https://github.com/ClickHouse/ClickHouse/issues/14806): Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Backported in [#14720](https://github.com/ClickHouse/ClickHouse/issues/14720): Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#14911](https://github.com/ClickHouse/ClickHouse/issues/14911): Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#14942](https://github.com/ClickHouse/ClickHouse/issues/14942): Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.7.4.11-stable.md b/docs/changelogs/v20.7.4.11-stable.md new file mode 100644 index 00000000000..0c8ad1e1639 --- /dev/null +++ b/docs/changelogs/v20.7.4.11-stable.md @@ -0,0 +1,36 @@ +### ClickHouse release v20.7.4.11-stable FIXME as compared to v20.7.3.7-stable + +#### Improvement +* Backported in [#15566](https://github.com/ClickHouse/ClickHouse/issues/15566): Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix +* Backported in [#15019](https://github.com/ClickHouse/ClickHouse/issues/15019): Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14824](https://github.com/ClickHouse/ClickHouse/issues/14824): Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15149](https://github.com/ClickHouse/ClickHouse/issues/15149): Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#15252](https://github.com/ClickHouse/ClickHouse/issues/15252): Fixed segfault in CacheDictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14988](https://github.com/ClickHouse/ClickHouse/issues/14988): Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#14970](https://github.com/ClickHouse/ClickHouse/issues/14970): Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Backported in [#15054](https://github.com/ClickHouse/ClickHouse/issues/15054): Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)). +* Backported in [#15078](https://github.com/ClickHouse/ClickHouse/issues/15078): Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15103](https://github.com/ClickHouse/ClickHouse/issues/15103): Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15058](https://github.com/ClickHouse/ClickHouse/issues/15058): If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15051](https://github.com/ClickHouse/ClickHouse/issues/15051): We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15140](https://github.com/ClickHouse/ClickHouse/issues/15140): Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15221](https://github.com/ClickHouse/ClickHouse/issues/15221): Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Backported in [#15406](https://github.com/ClickHouse/ClickHouse/issues/15406): Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15488](https://github.com/ClickHouse/ClickHouse/issues/15488): Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15280](https://github.com/ClickHouse/ClickHouse/issues/15280): Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15415](https://github.com/ClickHouse/ClickHouse/issues/15415): Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#15337](https://github.com/ClickHouse/ClickHouse/issues/15337): Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Backported in [#15333](https://github.com/ClickHouse/ClickHouse/issues/15333): Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Backported in [#15444](https://github.com/ClickHouse/ClickHouse/issues/15444): Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Backported in [#15506](https://github.com/ClickHouse/ClickHouse/issues/15506): Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Backported in [#15617](https://github.com/ClickHouse/ClickHouse/issues/15617): Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Backported in [#15559](https://github.com/ClickHouse/ClickHouse/issues/15559): Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Backported in [#15727](https://github.com/ClickHouse/ClickHouse/issues/15727): Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15653](https://github.com/ClickHouse/ClickHouse/issues/15653): Fix 'Database doesn't exist.' in queries with IN and Distributed table when there's no database on initiator. [#15538](https://github.com/ClickHouse/ClickHouse/pull/15538) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15632](https://github.com/ClickHouse/ClickHouse/issues/15632): Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#15584](https://github.com/ClickHouse/ClickHouse/issues/15584): Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15663](https://github.com/ClickHouse/ClickHouse/issues/15663): Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15716](https://github.com/ClickHouse/ClickHouse/issues/15716): Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v20.8.1.4513-prestable.md b/docs/changelogs/v20.8.1.4513-prestable.md new file mode 100644 index 00000000000..0f2385f916c --- /dev/null +++ b/docs/changelogs/v20.8.1.4513-prestable.md @@ -0,0 +1,133 @@ +### ClickHouse release v20.8.1.4513-prestable FIXME as compared to v20.7.1.4310-prestable + +#### Backward Incompatible Change +* Remove support for the `ODBCDriver` input/output format. This was a deprecated format once used for communication with the ClickHouse ODBC driver, now long superseded by the `ODBCDriver2` format. Resolves [#13629](https://github.com/ClickHouse/ClickHouse/issues/13629). [#13847](https://github.com/ClickHouse/ClickHouse/pull/13847) ([hexiaoting](https://github.com/hexiaoting)). +* Extend `parallel_distributed_insert_select` setting, adding an option to run `INSERT` into local table. The setting changes type from `Bool` to `UInt64`, so the values `false` and `true` are no longer supported. If you have these values in server configuration, the server will not start. Please replace them with `0` and `1`, respectively. [#14060](https://github.com/ClickHouse/ClickHouse/pull/14060) ([Azat Khuzhin](https://github.com/azat)). +* Now `OPTIMIZE FINAL` query doesn't recalculate TTL for parts that were added before TTL was created. Use `ALTER TABLE ... MATERIALIZE TTL` once to calculate them, after that `OPTIMIZE FINAL` will evaluate TTL's properly. This behavior never worked for replicated tables. [#14220](https://github.com/ClickHouse/ClickHouse/pull/14220) ([alesapin](https://github.com/alesapin)). + +#### New Feature +* Support `MaterializeMySQL` database engine. Implements [#4006](https://github.com/ClickHouse/ClickHouse/issues/4006). [#10851](https://github.com/ClickHouse/ClickHouse/pull/10851) ([Winter Zhang](https://github.com/zhang2014)). +* Support Kerberos authentication in Kafka, using `krb5` and `cyrus-sasl` libraries. [#12771](https://github.com/ClickHouse/ClickHouse/pull/12771) ([Ilya Golshtein](https://github.com/ilejn)). +* Add types `Int128`, `Int256`, `UInt256` and related functions for them. Extend Decimals with Decimal256 (precision up to 76 digits). New types are under the setting `allow_experimental_bigint_types`. [#13097](https://github.com/ClickHouse/ClickHouse/pull/13097) ([Artem Zuikov](https://github.com/4ertus2)). +* Function `position` now supports optional `start_pos` argument. [#13237](https://github.com/ClickHouse/ClickHouse/pull/13237) ([Vladimir C](https://github.com/vdimir)). +* Add `ALTER SAMPLE BY` statement that allows to change table sample clause. [#13280](https://github.com/ClickHouse/ClickHouse/pull/13280) ([Amos Bird](https://github.com/amosbird)). +* Add new optional section to the main config. [#13425](https://github.com/ClickHouse/ClickHouse/pull/13425) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add function `normalizeQuery` that replaces literals, sequences of literals and complex aliases with placeholders. Add function `normalizedQueryHash` that returns identical 64bit hash values for similar queries. It helps to analyze query log. This closes [#11271](https://github.com/ClickHouse/ClickHouse/issues/11271). [#13816](https://github.com/ClickHouse/ClickHouse/pull/13816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `quantileExactLow` and `quantileExactHigh` implementations with respective aliases for `medianExactLow` and `medianExactHigh`. [#13818](https://github.com/ClickHouse/ClickHouse/pull/13818) ([Bharat Nallan](https://github.com/bharatnc)). +* Add function `defaultValueOfTypeName` that returns the default value for a given type. [#13877](https://github.com/ClickHouse/ClickHouse/pull/13877) ([hcz](https://github.com/hczhcz)). +* Added `date_trunc` function that truncates a date/time value to a specified date/time part. [#13888](https://github.com/ClickHouse/ClickHouse/pull/13888) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). +* Add the ability to specify `Default` compression codec for columns that correspond to settings specified in `config.xml`. Implements: [#9074](https://github.com/ClickHouse/ClickHouse/issues/9074). [#14049](https://github.com/ClickHouse/ClickHouse/pull/14049) ([alesapin](https://github.com/alesapin)). +* Add setting `min_index_granularity_bytes` that protects against accidentally creating a table with very low `index_granularity_bytes` setting. [#14139](https://github.com/ClickHouse/ClickHouse/pull/14139) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `countDigits(x)` function that count number of decimal digits in integer or decimal column. Add `isDecimalOverflow(d, [p])` function that checks if the value in Decimal column is out of its (or specified) precision. [#14151](https://github.com/ClickHouse/ClickHouse/pull/14151) ([Artem Zuikov](https://github.com/4ertus2)). + +#### Performance Improvement +* When performing trivial `INSERT SELECT` queries, automatically set `max_threads` to 1 or `max_insert_threads`, and set `max_block_size` to `min_insert_block_size_rows`. Related to [#5907](https://github.com/ClickHouse/ClickHouse/issues/5907). [#12195](https://github.com/ClickHouse/ClickHouse/pull/12195) ([flynn](https://github.com/ucasfl)). +* Optimize `has()`, `indexOf()` and `countEqual()` functions for `Array(LowCardinality(T))` and constant right arguments. [#12550](https://github.com/ClickHouse/ClickHouse/pull/12550) ([Mike Kot](https://github.com/myrrc)). +* Slightly improve performance of aggregation by UInt8/UInt16 keys. [#13099](https://github.com/ClickHouse/ClickHouse/pull/13099) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fail fast if `max_rows_to_read` limit is exceeded on parts scan. The motivation behind this change is to skip ranges scan for all selected parts if it is clear that `max_rows_to_read` is already exceeded. The change is quite noticeable for queries over big number of parts. [#13677](https://github.com/ClickHouse/ClickHouse/pull/13677) ([Roman Khavronenko](https://github.com/hagen1778)). +* Enable parallel INSERTs for table engines `Null`, `Memory`, `Distributed` and `Buffer`. [#14120](https://github.com/ClickHouse/ClickHouse/pull/14120) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* Added cache layer for DiskS3 (cache to local disk mark and index files). [#13076](https://github.com/ClickHouse/ClickHouse/pull/13076) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix assert when decimal has too large negative exponent. Fixes [#13188](https://github.com/ClickHouse/ClickHouse/issues/13188). [#13228](https://github.com/ClickHouse/ClickHouse/pull/13228) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQueryTimeMicroseconds to system.events. [#13336](https://github.com/ClickHouse/ClickHouse/pull/13336) ([ianton-ru](https://github.com/ianton-ru)). +* Proper remote host checking in S3 redirects (security-related thing). [#13404](https://github.com/ClickHouse/ClickHouse/pull/13404) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Enable mixed granularity parts by default. [#13449](https://github.com/ClickHouse/ClickHouse/pull/13449) ([alesapin](https://github.com/alesapin)). +* Move parts from DIskLocal to DiskS3 in parallel. [#13459](https://github.com/ClickHouse/ClickHouse/pull/13459) ([Pavel Kovalenko](https://github.com/Jokser)). +* Support compound identifiers for custom settings. [#13496](https://github.com/ClickHouse/ClickHouse/pull/13496) ([Vitaly Baranov](https://github.com/vitlibar)). +* Provide monotonicity for `toDate/toDateTime` functions in more cases. Now the input arguments are saturated more naturally and provides better monotonicity. [#13497](https://github.com/ClickHouse/ClickHouse/pull/13497) ([Amos Bird](https://github.com/amosbird)). +* In previous versions `lcm` function may produce assertion violation in debug build if called with specifically crafted arguments. This fixes [#13368](https://github.com/ClickHouse/ClickHouse/issues/13368). [#13510](https://github.com/ClickHouse/ClickHouse/pull/13510) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add MergeTree Write-Ahead-Log(WAL) dump tool. [#13640](https://github.com/ClickHouse/ClickHouse/pull/13640) ([BohuTANG](https://github.com/BohuTANG)). +* Avoid too slow queries when arrays are manipulated as fields. Throw exception instead. [#13753](https://github.com/ClickHouse/ClickHouse/pull/13753) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. Add [GTID-Based Replication](https://dev.mysql.com/doc/refman/5.7/en/replication-gtids-concepts.html), it works even when replication topology changes, and supported/prefered in MySQL 5.6/5.7/8.0 2. Add BIT/SET filed type supports 3. Fix up varchar type meta length bug. [#13820](https://github.com/ClickHouse/ClickHouse/pull/13820) ([BohuTANG](https://github.com/BohuTANG)). +* Fix data race in `lgamma` function. This race was caught only in `tsan`, no side effects a really happened. [#13842](https://github.com/ClickHouse/ClickHouse/pull/13842) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Function `arrayCompact` will compare NaNs bitwise if the type of array elements is Float32/Float64. In previous versions NaNs were always not equal if the type of array elements is Float32/Float64 and were always equal if the type is more complex, like Nullable(Float64). This closes [#13857](https://github.com/ClickHouse/ClickHouse/issues/13857). [#13868](https://github.com/ClickHouse/ClickHouse/pull/13868) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better error message for null value of TabSeparatedRow format. [#13906](https://github.com/ClickHouse/ClickHouse/pull/13906) ([jiang tao](https://github.com/tomjiang1987)). +* Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Corrected an error in AvroConfluent format that caused the Kafka table engine to stop processing messages when an abnormally small, malformed, message was received. [#13941](https://github.com/ClickHouse/ClickHouse/pull/13941) ([Gervasio Varela](https://github.com/gervarela)). +* Increase limit in -Resample combinator to 1M. [#13947](https://github.com/ClickHouse/ClickHouse/pull/13947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)). +* Slightly better performance of Memory table if it was constructed from a huge number of very small blocks (that's unlikely). Author of the idea: [Mark Papadakis](https://github.com/markpapadakis). Closes [#14043](https://github.com/ClickHouse/ClickHouse/issues/14043). [#14056](https://github.com/ClickHouse/ClickHouse/pull/14056) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `toDeicmal256()` and attempt to fix potential perf regression in Decimal to Float conversion. [#14110](https://github.com/ClickHouse/ClickHouse/pull/14110) ([Artem Zuikov](https://github.com/4ertus2)). +* Now it's possible to `ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'zk://:/path-in-zookeeper'`. It's useful for shipping data to new clusters. [#14155](https://github.com/ClickHouse/ClickHouse/pull/14155) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix +* subquery hash values are not enough to distinguish. [#8333](https://github.com/ClickHouse/ClickHouse/issues/8333). [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)). +* Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)). +* Fix access to redis dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)). +* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible race in `StorageMemory`. https://clickhouse-test-reports.s3.yandex.net/0/9cac8a7244063d2092ad25d45502611e18d3749c/stress_test_(thread)/stderr.log Have no idea how to write a test. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix premature `ON CLUSTER` timeouts for queries that must be executed on a single replica. Fixes [#6704](https://github.com/ClickHouse/ClickHouse/issues/6704), Fixes [#7228](https://github.com/ClickHouse/ClickHouse/issues/7228), Fixes [#13361](https://github.com/ClickHouse/ClickHouse/issues/13361), Fixes [#11884](https://github.com/ClickHouse/ClickHouse/issues/11884). [#13450](https://github.com/ClickHouse/ClickHouse/pull/13450) ([alesapin](https://github.com/alesapin)). +* Fix secondary indices corruption in compact parts. [#13538](https://github.com/ClickHouse/ClickHouse/pull/13538) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix typo in error message about `The value of 'number_of_free_entries_in_pool_to_lower_max_size_of_merge' setting`. [#13678](https://github.com/ClickHouse/ClickHouse/pull/13678) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)). +* Do not optimize any(arrayJoin()) -> arrayJoin() under optimize_move_functions_out_of_any. [#13681](https://github.com/ClickHouse/ClickHouse/pull/13681) ([Azat Khuzhin](https://github.com/azat)). +* Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect message in `clickhouse-server.init` while checking user and group. [#13711](https://github.com/ClickHouse/ClickHouse/pull/13711) ([ylchou](https://github.com/ylchou)). +* Fixes /replicas_status endpoint response status code when verbose=1. [#13722](https://github.com/ClickHouse/ClickHouse/pull/13722) ([javi santana](https://github.com/javisantana)). +* Fix logging Settings.Names/Values when log_queries_min_type > QUERY_START. [#13737](https://github.com/ClickHouse/ClickHouse/pull/13737) ([Azat Khuzhin](https://github.com/azat)). +* Fix race condition between DETACH and background merges. Parts may revive after detach. This is continuation of [#8602](https://github.com/ClickHouse/ClickHouse/issues/8602) that did not fix the issue but introduced a test that started to fail in very rare cases, demonstrating the issue. [#13746](https://github.com/ClickHouse/ClickHouse/pull/13746) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add range check for h3KRing function. This fixes [#13633](https://github.com/ClickHouse/ClickHouse/issues/13633). [#13752](https://github.com/ClickHouse/ClickHouse/pull/13752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed `Directory not empty` error when concurrently executing `DROP DATABASE` and `CREATE TABLE`. [#13756](https://github.com/ClickHouse/ClickHouse/pull/13756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix step overflow in range(). [#13790](https://github.com/ClickHouse/ClickHouse/pull/13790) ([Azat Khuzhin](https://github.com/azat)). +* Fix reading from MergeTree table with INDEX of type SET fails when comparing against NULL. This fixes [#13686](https://github.com/ClickHouse/ClickHouse/issues/13686). [#13793](https://github.com/ClickHouse/ClickHouse/pull/13793) ([Amos Bird](https://github.com/amosbird)). +* Fix topK/topKWeighted merge (with non-default parameters). [#13817](https://github.com/ClickHouse/ClickHouse/pull/13817) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect sorting for `FixedString` columns. Fixes [#13182](https://github.com/ClickHouse/ClickHouse/issues/13182). [#13887](https://github.com/ClickHouse/ClickHouse/pull/13887) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed potential deadlock when renaming `Distributed` table. [#13922](https://github.com/ClickHouse/ClickHouse/pull/13922) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix wrong results in select queries with `DISTINCT` keyword in case `optimize_duplicate_order_by_and_distinct` setting is enabled. [#13925](https://github.com/ClickHouse/ClickHouse/pull/13925) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix parser to reject create table as table function with engine. [#13940](https://github.com/ClickHouse/ClickHouse/pull/13940) ([hcz](https://github.com/hczhcz)). +* Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix pointInPolygon with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)). +* Fix DistributedFilesToInsert metric (zeroed when it should not). [#14095](https://github.com/ClickHouse/ClickHouse/pull/14095) ([Azat Khuzhin](https://github.com/azat)). +* When waiting for a dictionary update to complete, use the timeout specified by `query_wait_timeout_milliseconds` setting instead of a hard-coded value. [#14105](https://github.com/ClickHouse/ClickHouse/pull/14105) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix formatting of minimal negative decimal numbers. This fixes [#14111](https://github.com/ClickHouse/ClickHouse/issues/14111). [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash when INERT INTO Kafka engine table from an empty result set with a subquery. ... [#14203](https://github.com/ClickHouse/ClickHouse/pull/14203) ([Dongdong Yang](https://github.com/donge)). +* Fixed incorrect sorting order if LowCardinality column. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277 . [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)). +* fixes [#14231](https://github.com/ClickHouse/ClickHouse/issues/14231) fix wrong lexer in MaterializeMySQL database engine dump stage. [#14232](https://github.com/ClickHouse/ClickHouse/pull/14232) ([Winter Zhang](https://github.com/zhang2014)). +* Fix handling of empty transactions in `MaterializeMySQL` database engine. This fixes [#14235](https://github.com/ClickHouse/ClickHouse/issues/14235). [#14253](https://github.com/ClickHouse/ClickHouse/pull/14253) ([BohuTANG](https://github.com/BohuTANG)). +* Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes [#13861](https://github.com/ClickHouse/ClickHouse/issues/13861). [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* Move Dockerfiles from integration tests to `docker/test` directory. docker_compose files are available in `runner` docker container. Docker images are built in CI and not in integration tests. [#13448](https://github.com/ClickHouse/ClickHouse/pull/13448) ([Ilya Yatsishin](https://github.com/qoega)). +* Skip PR's from robot-clickhouse. [#13489](https://github.com/ClickHouse/ClickHouse/pull/13489) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix typos in code with codespell. [#13511](https://github.com/ClickHouse/ClickHouse/pull/13511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable check for typos in code with `codespell`. [#13513](https://github.com/ClickHouse/ClickHouse/pull/13513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ensure that there is no copy-pasted GPL code. [#13514](https://github.com/ClickHouse/ClickHouse/pull/13514) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to run `clickhouse` binary without configuration. [#13515](https://github.com/ClickHouse/ClickHouse/pull/13515) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added `clickhouse install` script, that is useful if you only have a single binary. [#13528](https://github.com/ClickHouse/ClickHouse/pull/13528) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the remaining shellcheck notices. A preparation to enable Shellcheck. [#13529](https://github.com/ClickHouse/ClickHouse/pull/13529) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable Shellcheck in CI as a linter of .sh tests. This closes [#13168](https://github.com/ClickHouse/ClickHouse/issues/13168). [#13530](https://github.com/ClickHouse/ClickHouse/pull/13530) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make sure [#10977](https://github.com/ClickHouse/ClickHouse/issues/10977) is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)). +* Increasing health-check timeouts for ClickHouse nodes and adding support to dump docker-compose logs if unhealthy containers found. [#13612](https://github.com/ClickHouse/ClickHouse/pull/13612) ([vzakaznikov](https://github.com/vzakaznikov)). +* Build ClickHouse with the most fresh tzdata from package repository. [#13623](https://github.com/ClickHouse/ClickHouse/pull/13623) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Removed `-DENABLE_CURL_CLIENT` for `contrib/aws`. [#13628](https://github.com/ClickHouse/ClickHouse/pull/13628) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Expose version of embedded tzdata via TZDATA_VERSION in system.build_options. [#13648](https://github.com/ClickHouse/ClickHouse/pull/13648) ([filimonov](https://github.com/filimonov)). +* Updating LDAP user authentication suite to check that it works with RBAC. [#13656](https://github.com/ClickHouse/ClickHouse/pull/13656) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add a CMake option to fail configuration instead of auto-reconfiguration, enabled by default. [#13687](https://github.com/ClickHouse/ClickHouse/pull/13687) ([Konstantin](https://github.com/podshumok)). +* Fix link error in shared build. [#13700](https://github.com/ClickHouse/ClickHouse/pull/13700) ([Amos Bird](https://github.com/amosbird)). +* FIx cassandra build on Mac OS. [#13708](https://github.com/ClickHouse/ClickHouse/pull/13708) ([Ilya Yatsishin](https://github.com/qoega)). +* Added docker image for style check. Added style check that all docker and docker compose files are located in docker directory. [#13724](https://github.com/ClickHouse/ClickHouse/pull/13724) ([Ilya Yatsishin](https://github.com/qoega)). +* ZooKeeper cannot work reliably in unit tests in CI infrastructure. Using unit tests for ZooKeeper interaction with real ZooKeeper is bad idea from the start (unit tests are not supposed to verify complex distributed systems). We already using integration tests for this purpose and they are better suited. [#13745](https://github.com/ClickHouse/ClickHouse/pull/13745) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Testflows LDAP module: adding missing certificates and dhparam.pem for openldap4. [#13780](https://github.com/ClickHouse/ClickHouse/pull/13780) ([vzakaznikov](https://github.com/vzakaznikov)). +* Enabled text-log in stress test to find more bugs. [#13855](https://github.com/ClickHouse/ClickHouse/pull/13855) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* * Adding retry logic when bringing up docker-compose cluster * Increasing COMPOSE_HTTP_TIMEOUT. [#14112](https://github.com/ClickHouse/ClickHouse/pull/14112) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add the ability to write js-style comments in skip_list.json. [#14159](https://github.com/ClickHouse/ClickHouse/pull/14159) ([alesapin](https://github.com/alesapin)). +* Switch tests docker images to use test-base parent. [#14167](https://github.com/ClickHouse/ClickHouse/pull/14167) ([Ilya Yatsishin](https://github.com/qoega)). +* Actually there are no symlinks there, so `-type f` is enough ``` ~/workspace/ClickHouse/contrib/cctz/testdata/zoneinfo$ find . -type l -ls | wc -l 0 ``` Closes [#14209](https://github.com/ClickHouse/ClickHouse/issues/14209). [#14215](https://github.com/ClickHouse/ClickHouse/pull/14215) ([filimonov](https://github.com/filimonov)). + +#### Other +* Fix readline so it dumps history to file now. [#13600](https://github.com/ClickHouse/ClickHouse/pull/13600) ([Amos Bird](https://github.com/amosbird)). +* Create `system` database with `Atomic` engine by default. [#13680](https://github.com/ClickHouse/ClickHouse/pull/13680) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump pymdown-extensions from 7.1 to 8.0 in /docs/tools'. [#13645](https://github.com/ClickHouse/ClickHouse/pull/13645) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump protobuf from 3.12.4 to 3.13.0 in /docs/tools'. [#13824](https://github.com/ClickHouse/ClickHouse/pull/13824) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + diff --git a/docs/changelogs/v20.8.10.13-lts.md b/docs/changelogs/v20.8.10.13-lts.md new file mode 100644 index 00000000000..b6c69e115a9 --- /dev/null +++ b/docs/changelogs/v20.8.10.13-lts.md @@ -0,0 +1,6 @@ +### ClickHouse release v20.8.10.13-lts FIXME as compared to v20.8.9.6-lts + +#### Bug Fix +* Backported in [#17967](https://github.com/ClickHouse/ClickHouse/issues/17967): Fix [#15235](https://github.com/ClickHouse/ClickHouse/issues/15235). When clickhouse-copier handle non-partitioned table, throws segfault error. [#17248](https://github.com/ClickHouse/ClickHouse/pull/17248) ([Qi Chen](https://github.com/kaka11chen)). +* Backported in [#18155](https://github.com/ClickHouse/ClickHouse/issues/18155): fix incorrect initialize `max_compress_block_size` of MergeTreeWriterSettings with `min_compress_block_size`. [#17833](https://github.com/ClickHouse/ClickHouse/pull/17833) ([flynn](https://github.com/ucasfl)). + diff --git a/docs/changelogs/v20.8.11.17-lts.md b/docs/changelogs/v20.8.11.17-lts.md new file mode 100644 index 00000000000..8416df896d6 --- /dev/null +++ b/docs/changelogs/v20.8.11.17-lts.md @@ -0,0 +1,5 @@ +### ClickHouse release v20.8.11.17-lts FIXME as compared to v20.8.10.13-lts + +#### Bug Fix +* Backported in [#18442](https://github.com/ClickHouse/ClickHouse/issues/18442): Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v20.8.12.2-lts.md b/docs/changelogs/v20.8.12.2-lts.md new file mode 100644 index 00000000000..855ef016031 --- /dev/null +++ b/docs/changelogs/v20.8.12.2-lts.md @@ -0,0 +1,5 @@ +### ClickHouse release v20.8.12.2-lts FIXME as compared to v20.8.11.17-lts + +#### Bug Fix +* Backported in [#18514](https://github.com/ClickHouse/ClickHouse/issues/18514): Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). + diff --git a/docs/changelogs/v20.8.13.15-lts.md b/docs/changelogs/v20.8.13.15-lts.md new file mode 100644 index 00000000000..7635f641d92 --- /dev/null +++ b/docs/changelogs/v20.8.13.15-lts.md @@ -0,0 +1,42 @@ +### ClickHouse release v20.8.13.15-lts FIXME as compared to v20.8.12.2-lts + +#### Backward Incompatible Change +* Backported in [#20418](https://github.com/ClickHouse/ClickHouse/issues/20418): Now it's not allowed to create MergeTree tables in old syntax with table TTL because it's just ignored. Attach of old tables is still possible. [#20282](https://github.com/ClickHouse/ClickHouse/pull/20282) ([alesapin](https://github.com/alesapin)). + +#### Performance Improvement +* Backported in [#20926](https://github.com/ClickHouse/ClickHouse/issues/20926): Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* Backported in [#20953](https://github.com/ClickHouse/ClickHouse/issues/20953): Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#20951](https://github.com/ClickHouse/ClickHouse/issues/20951): TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#20918](https://github.com/ClickHouse/ClickHouse/issues/20918): Fix indeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#20924](https://github.com/ClickHouse/ClickHouse/issues/20924): Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Backported in [#20915](https://github.com/ClickHouse/ClickHouse/issues/20915): Fix empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#20922](https://github.com/ClickHouse/ClickHouse/issues/20922): Fixed segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#20920](https://github.com/ClickHouse/ClickHouse/issues/20920): Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#20919](https://github.com/ClickHouse/ClickHouse/issues/20919): Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#20917](https://github.com/ClickHouse/ClickHouse/issues/20917): - Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual IPv4/IPv6 stack; - Fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes; Possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)). +* Backported in [#20914](https://github.com/ClickHouse/ClickHouse/issues/20914): Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#20913](https://github.com/ClickHouse/ClickHouse/issues/20913): Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Backported in [#20912](https://github.com/ClickHouse/ClickHouse/issues/20912): Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)). +* Backported in [#20910](https://github.com/ClickHouse/ClickHouse/issues/20910): Fix previous bug when date overflow with different values. Strict Date value limit to "2106-02-07", cast date > "2106-02-07" to value 0. [#18565](https://github.com/ClickHouse/ClickHouse/pull/18565) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#20907](https://github.com/ClickHouse/ClickHouse/issues/20907): Fix Logger with unmatched arg size. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)). +* Backported in [#20905](https://github.com/ClickHouse/ClickHouse/issues/20905): Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* Backported in [#20904](https://github.com/ClickHouse/ClickHouse/issues/20904): Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#20901](https://github.com/ClickHouse/ClickHouse/issues/20901): Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#20900](https://github.com/ClickHouse/ClickHouse/issues/20900): Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#20897](https://github.com/ClickHouse/ClickHouse/issues/20897): - Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#20898](https://github.com/ClickHouse/ClickHouse/issues/20898): Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#20894](https://github.com/ClickHouse/ClickHouse/issues/20894): Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#20896](https://github.com/ClickHouse/ClickHouse/issues/20896): Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#20893](https://github.com/ClickHouse/ClickHouse/issues/20893): Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#20890](https://github.com/ClickHouse/ClickHouse/issues/20890): Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#20525](https://github.com/ClickHouse/ClickHouse/issues/20525): Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Backported in [#20891](https://github.com/ClickHouse/ClickHouse/issues/20891): Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#20889](https://github.com/ClickHouse/ClickHouse/issues/20889): In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#20521](https://github.com/ClickHouse/ClickHouse/issues/20521): The `MongoDB` table engine now establishes connection only when it's going to read data. `ATTACH TABLE` won't try to connect anymore. [#20110](https://github.com/ClickHouse/ClickHouse/pull/20110) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#20533](https://github.com/ClickHouse/ClickHouse/issues/20533): Fixed the behavior when in case of broken JSON we tried to read the whole file into memory which leads to exception from the allocator. Fixes [#19719](https://github.com/ClickHouse/ClickHouse/issues/19719). [#20286](https://github.com/ClickHouse/ClickHouse/pull/20286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#20414](https://github.com/ClickHouse/ClickHouse/issues/20414): Restrict to `DROP` or `RENAME` version column of `*CollapsingMergeTree` and `ReplacingMergeTree` table engines. [#20300](https://github.com/ClickHouse/ClickHouse/pull/20300) ([alesapin](https://github.com/alesapin)). +* Backported in [#20522](https://github.com/ClickHouse/ClickHouse/issues/20522): Avoid invalid dereference in RANGE_HASHED() dictionary. [#20345](https://github.com/ClickHouse/ClickHouse/pull/20345) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20591](https://github.com/ClickHouse/ClickHouse/issues/20591): Fixed inconsistent behavior of dictionary in case of queries where we look for absent keys in dictionary. [#20578](https://github.com/ClickHouse/ClickHouse/pull/20578) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + diff --git a/docs/changelogs/v20.8.14.4-lts.md b/docs/changelogs/v20.8.14.4-lts.md new file mode 100644 index 00000000000..0ac5d3fa713 --- /dev/null +++ b/docs/changelogs/v20.8.14.4-lts.md @@ -0,0 +1,19 @@ +### ClickHouse release v20.8.14.4-lts FIXME as compared to v20.8.13.15-lts + +#### Improvement +* Backported in [#21235](https://github.com/ClickHouse/ClickHouse/issues/21235): The value of MYSQL_OPT_RECONNECT option can now be controlled by "opt_reconnect" parameter in the config section of mysql replica. [#19998](https://github.com/ClickHouse/ClickHouse/pull/19998) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#21203](https://github.com/ClickHouse/ClickHouse/issues/21203): When loading config for mysql source ClickHouse will now randomize the list of replicas with the same priority to ensure the round-robin logics of picking mysql endpoint. This closes [#20629](https://github.com/ClickHouse/ClickHouse/issues/20629). [#20632](https://github.com/ClickHouse/ClickHouse/pull/20632) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#21336](https://github.com/ClickHouse/ClickHouse/issues/21336): MySQL dictionary source will now retry unexpected connection failures (Lost connection to MySQL server during query) which sometimes happen on SSL/TLS connections. [#21237](https://github.com/ClickHouse/ClickHouse/pull/21237) ([Alexander Kazakov](https://github.com/Akazz)). + +#### Bug Fix +* Backported in [#20950](https://github.com/ClickHouse/ClickHouse/issues/20950): Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#20948](https://github.com/ClickHouse/ClickHouse/issues/20948): Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#20903](https://github.com/ClickHouse/ClickHouse/issues/20903): Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#20899](https://github.com/ClickHouse/ClickHouse/issues/20899): Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Backported in [#20892](https://github.com/ClickHouse/ClickHouse/issues/20892): Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#20888](https://github.com/ClickHouse/ClickHouse/issues/20888): Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). +* Backported in [#21049](https://github.com/ClickHouse/ClickHouse/issues/21049): Fix usage of `-Distinct` combinator with `-State` combinator in aggregate functions. [#20866](https://github.com/ClickHouse/ClickHouse/pull/20866) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#21130](https://github.com/ClickHouse/ClickHouse/issues/21130): Fixed behaviour, when `ALTER MODIFY COLUMN` created mutation, that will knowingly fail. [#21007](https://github.com/ClickHouse/ClickHouse/pull/21007) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#21159](https://github.com/ClickHouse/ClickHouse/issues/21159): Fix `input_format_null_as_default` take effective when types are nullable. This fixes [#21116](https://github.com/ClickHouse/ClickHouse/issues/21116) . [#21121](https://github.com/ClickHouse/ClickHouse/pull/21121) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#21378](https://github.com/ClickHouse/ClickHouse/issues/21378): Fix error `Bad cast from type ... to DB::ColumnLowCardinality` while inserting into table with `LowCardinality` column from `Values` format. Fixes [#21140](https://github.com/ClickHouse/ClickHouse/issues/21140). [#21357](https://github.com/ClickHouse/ClickHouse/pull/21357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v20.8.15.11-lts.md b/docs/changelogs/v20.8.15.11-lts.md new file mode 100644 index 00000000000..dfdb0cf1ad2 --- /dev/null +++ b/docs/changelogs/v20.8.15.11-lts.md @@ -0,0 +1,17 @@ +### ClickHouse release v20.8.15.11-lts FIXME as compared to v20.8.14.4-lts + +#### Bug Fix +* Backported in [#21627](https://github.com/ClickHouse/ClickHouse/issues/21627): Fix max_distributed_connections (affects `prefer_localhost_replica=1` and `max_threads!=max_distributed_connections`). [#17848](https://github.com/ClickHouse/ClickHouse/pull/17848) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#21626](https://github.com/ClickHouse/ClickHouse/issues/21626): Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#21409](https://github.com/ClickHouse/ClickHouse/issues/21409): Now mutations allowed only for table engines that support them (MergeTree family, Memory, MaterializedView). Other engines will report a more clear error. Fixes [#21168](https://github.com/ClickHouse/ClickHouse/issues/21168). [#21183](https://github.com/ClickHouse/ClickHouse/pull/21183) ([alesapin](https://github.com/alesapin)). +* Backported in [#22190](https://github.com/ClickHouse/ClickHouse/issues/22190): Fixed race on SSL object inside SecureSocket in Poco. [#21456](https://github.com/ClickHouse/ClickHouse/pull/21456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#21547](https://github.com/ClickHouse/ClickHouse/issues/21547): Fix a deadlock in `ALTER DELETE` mutations for non replicated MergeTree table engines when the predicate contains the table itself. Fixes [#20558](https://github.com/ClickHouse/ClickHouse/issues/20558). [#21477](https://github.com/ClickHouse/ClickHouse/pull/21477) ([alesapin](https://github.com/alesapin)). +* Backported in [#22284](https://github.com/ClickHouse/ClickHouse/issues/22284): Fix bug for ReplicatedMerge table engines when `ALTER MODIFY COLUMN` query doesn't change the type of decimal column if its size (32 bit or 64 bit) doesn't change. [#21728](https://github.com/ClickHouse/ClickHouse/pull/21728) ([alesapin](https://github.com/alesapin)). +* Backported in [#22282](https://github.com/ClickHouse/ClickHouse/issues/22282): Docker entrypoint: avoid chown of `.` in case when `LOG_PATH` is empty. Closes [#22100](https://github.com/ClickHouse/ClickHouse/issues/22100). [#22102](https://github.com/ClickHouse/ClickHouse/pull/22102) ([filimonov](https://github.com/filimonov)). +* Backported in [#22279](https://github.com/ClickHouse/ClickHouse/issues/22279): Fix waiting for `OPTIMIZE` and `ALTER` queries for `ReplicatedMergeTree` table engines. Now the query will not hang when the table was detached or restarted. [#22118](https://github.com/ClickHouse/ClickHouse/pull/22118) ([alesapin](https://github.com/alesapin)). +* Backported in [#22370](https://github.com/ClickHouse/ClickHouse/issues/22370): Now clickhouse will not throw `LOGICAL_ERROR` exception when we try to mutate the already covered part. Fixes [#22013](https://github.com/ClickHouse/ClickHouse/issues/22013). [#22291](https://github.com/ClickHouse/ClickHouse/pull/22291) ([alesapin](https://github.com/alesapin)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#17848](https://github.com/ClickHouse/ClickHouse/issues/17848) to 20.8: Fix max_distributed_connections"'. [#21940](https://github.com/ClickHouse/ClickHouse/pull/21940) ([Maksim Kita](https://github.com/kitaisreal)). + diff --git a/docs/changelogs/v20.8.16.20-lts.md b/docs/changelogs/v20.8.16.20-lts.md new file mode 100644 index 00000000000..14c59f434d0 --- /dev/null +++ b/docs/changelogs/v20.8.16.20-lts.md @@ -0,0 +1,6 @@ +### ClickHouse release v20.8.16.20-lts FIXME as compared to v20.8.15.11-lts + +#### Bug Fix +* Backported in [#22091](https://github.com/ClickHouse/ClickHouse/issues/22091): In case if query has constant `WHERE` condition, and setting `optimize_skip_unused_shards` enabled, all shards may be skipped and query could return incorrect empty result. [#21550](https://github.com/ClickHouse/ClickHouse/pull/21550) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#22049](https://github.com/ClickHouse/ClickHouse/issues/22049): Fix deadlock in first catboost model execution. Closes [#13832](https://github.com/ClickHouse/ClickHouse/issues/13832). [#21844](https://github.com/ClickHouse/ClickHouse/pull/21844) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v20.8.17.25-lts.md b/docs/changelogs/v20.8.17.25-lts.md new file mode 100644 index 00000000000..124e5222293 --- /dev/null +++ b/docs/changelogs/v20.8.17.25-lts.md @@ -0,0 +1,10 @@ +### ClickHouse release v20.8.17.25-lts FIXME as compared to v20.8.16.20-lts + +#### Bug Fix +* Backported in [#21341](https://github.com/ClickHouse/ClickHouse/issues/21341): Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22698](https://github.com/ClickHouse/ClickHouse/issues/22698): Fix wait for mutations on several replicas for ReplicatedMergeTree table engines. Previously, mutation/alter query may finish before mutation actually executed on other replicas. [#22669](https://github.com/ClickHouse/ClickHouse/pull/22669) ([alesapin](https://github.com/alesapin)). +* Backported in [#22737](https://github.com/ClickHouse/ClickHouse/issues/22737): Fix possible hangs in zk requests in case of OOM exception. Fixes [#22438](https://github.com/ClickHouse/ClickHouse/issues/22438). [#22684](https://github.com/ClickHouse/ClickHouse/pull/22684) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Build/Testing/Packaging Improvement +* Backported in [#22819](https://github.com/ClickHouse/ClickHouse/issues/22819): Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.8.18.32-lts.md b/docs/changelogs/v20.8.18.32-lts.md new file mode 100644 index 00000000000..92fba618f4a --- /dev/null +++ b/docs/changelogs/v20.8.18.32-lts.md @@ -0,0 +1,8 @@ +### ClickHouse release v20.8.18.32-lts FIXME as compared to v20.8.17.25-lts + +#### Bug Fix +* Backported in [#22288](https://github.com/ClickHouse/ClickHouse/issues/22288): Fix null dereference with `join_use_nulls=1`. [#20344](https://github.com/ClickHouse/ClickHouse/pull/20344) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#22556](https://github.com/ClickHouse/ClickHouse/issues/22556): Fix bug in partial merge join with `LowCardinality`. Close [#22386](https://github.com/ClickHouse/ClickHouse/issues/22386), close [#22388](https://github.com/ClickHouse/ClickHouse/issues/22388). [#22510](https://github.com/ClickHouse/ClickHouse/pull/22510) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#22891](https://github.com/ClickHouse/ClickHouse/issues/22891): Fix approx total rows accounting for reverse reading from MergeTree. [#22726](https://github.com/ClickHouse/ClickHouse/pull/22726) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#23172](https://github.com/ClickHouse/ClickHouse/issues/23172): Some values were formatted with alignment in center in table cells in `Markdown` format. Not anymore. [#23096](https://github.com/ClickHouse/ClickHouse/pull/23096) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.8.19.4-stable.md b/docs/changelogs/v20.8.19.4-stable.md new file mode 100644 index 00000000000..316d5f79d30 --- /dev/null +++ b/docs/changelogs/v20.8.19.4-stable.md @@ -0,0 +1,29 @@ +### ClickHouse release v20.8.19.4-stable FIXME as compared to v20.8.18.32-lts + +#### Improvement +* Backported in [#24720](https://github.com/ClickHouse/ClickHouse/issues/24720): If tuple of NULLs, e.g. `(NULL, NULL)` is on the left hand side of `IN` operator with tuples of non-NULLs on the right hand side, e.g. `SELECT (NULL, NULL) IN ((0, 0), (3, 1))` return 0 instead of throwing an exception about incompatible types. The expression may also appear due to optimization of something like `SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1)`. This closes [#22017](https://github.com/ClickHouse/ClickHouse/issues/22017). [#22063](https://github.com/ClickHouse/ClickHouse/pull/22063) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix +* Backported in [#24713](https://github.com/ClickHouse/ClickHouse/issues/24713): Fixed very rare race condition on background cleanup of old blocks. It might cause a block not to be deduplicated if it's too close to the end of deduplication window. [#23301](https://github.com/ClickHouse/ClickHouse/pull/23301) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24715](https://github.com/ClickHouse/ClickHouse/issues/24715): Fixed `Cannot unlink file` error on unsuccessful creation of ReplicatedMergeTree table with multidisk configuration. This closes [#21755](https://github.com/ClickHouse/ClickHouse/issues/21755). [#23433](https://github.com/ClickHouse/ClickHouse/pull/23433) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24717](https://github.com/ClickHouse/ClickHouse/issues/24717): When modify column's default value without datatype, and this column is used as ReplacingMergeTree's parameter like column `b` in the below example, then the server will core dump: ``` CREATE TABLE alter_test (a Int32, b DateTime) ENGINE = ReplacingMergeTree(b) ORDER BY a; ALTER TABLE alter_test MODIFY COLUMN `b` DEFAULT now(); ``` the sever throw error: ``` 2021.04.22 09:48:00.685317 [ 2607 ] {} BaseDaemon: Received signal 11 2021.04.22 09:48:00.686110 [ 2705 ] {} BaseDaemon: ######################################## 2021.04.22 09:48:00.686336 [ 2705 ] {} BaseDaemon: (version 21.6.1.1, build id: 6459E84DFCF8E778546C5AD2FFE91B3AD71E1B1B) (from thread 2619) (no query) Received signal Segmentation fault (11) 2021.04.22 09:48:00.686572 [ 2705 ] {} BaseDaemon: Address: NULL pointer. Access: read. Address not mapped to object. 2021.04.22 09:48:00.686686 [ 2705 ] {} BaseDaemon: Stack trace: 0x1c2585d7 0x1c254f66 0x1bb7e403 0x1bb58923 0x1bb56a85 0x1c6840ef 0x1c691148 0x2061a05c 0x2061a8e4 0x20775a03 0x207722bd 0x20771048 0x7f6e5c25be25 0x7f6e5bd81bad 2021.04.22 09:48:02.283045 [ 2705 ] {} BaseDaemon: 4. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1449: DB::(anonymous namespace)::checkVersionColumnTypesConversion(DB::IDataType const*, DB::IDataType const*, std::__1::basic_string, std::__1::allocator >) @ 0x1c2585d7 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:03.714451 [ 2705 ] {} BaseDaemon: 5. /mnt/disk4/hewenting/ClickHouse/src/src/Storages/MergeTree/MergeTreeData.cpp:1582: DB::MergeTreeData::checkAlterIsPossible(DB::AlterCommands const&, std::__1::shared_ptr) const @ 0x1c254f66 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server 2021.04.22 09:48:04.692949 [ 2705 ] {} BaseDaemon: 6. /mnt/disk4/hewenting/ClickHouse/src/src/Interpreters/InterpreterAlterQuery.cpp:144: DB::InterpreterAlterQuery::execute() @ 0x1bb7e403 in /mnt/disk4/hewenting/ClickHouse/build-dbgsrc-clang-dev-nested/programs/clickhouse-server ```. [#23483](https://github.com/ClickHouse/ClickHouse/pull/23483) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#24744](https://github.com/ClickHouse/ClickHouse/issues/24744): Fix `columns` function when multiple joins in select query. Closes [#22736](https://github.com/ClickHouse/ClickHouse/issues/22736). [#23501](https://github.com/ClickHouse/ClickHouse/pull/23501) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#24711](https://github.com/ClickHouse/ClickHouse/issues/24711): Fix misinterpretation of some `LIKE` expressions with escape sequences. [#23610](https://github.com/ClickHouse/ClickHouse/pull/23610) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#24709](https://github.com/ClickHouse/ClickHouse/issues/24709): Fixed a bug in recovery of staled `ReplicatedMergeTree` replica. Some metadata updates could be ignored by staled replica if `ALTER` query was executed during downtime of the replica. [#23742](https://github.com/ClickHouse/ClickHouse/pull/23742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24708](https://github.com/ClickHouse/ClickHouse/issues/24708): Fix SIGSEGV for external GROUP BY and overflow row (i.e. queries like `SELECT FROM GROUP BY WITH TOTALS SETTINGS max_bytes_before_external_group_by>0, max_rows_to_group_by>0, group_by_overflow_mode='any', totals_mode='before_having'`). [#23962](https://github.com/ClickHouse/ClickHouse/pull/23962) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#24707](https://github.com/ClickHouse/ClickHouse/issues/24707): Fix crash in MergeJoin, close [#24010](https://github.com/ClickHouse/ClickHouse/issues/24010). [#24013](https://github.com/ClickHouse/ClickHouse/pull/24013) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#24760](https://github.com/ClickHouse/ClickHouse/issues/24760): Fix race condition which could happen in RBAC under a heavy load. This PR fixes [#24090](https://github.com/ClickHouse/ClickHouse/issues/24090), [#24134](https://github.com/ClickHouse/ClickHouse/issues/24134),. [#24176](https://github.com/ClickHouse/ClickHouse/pull/24176) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#24704](https://github.com/ClickHouse/ClickHouse/issues/24704): Fixed a bug in moving Materialized View from Ordinary to Atomic database (`RENAME TABLE` query). Now inner table is moved to new database together with Materialized View. Fixes [#23926](https://github.com/ClickHouse/ClickHouse/issues/23926). [#24309](https://github.com/ClickHouse/ClickHouse/pull/24309) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#24823](https://github.com/ClickHouse/ClickHouse/issues/24823): - Fixed the deadlock that can happen during LDAP role (re)mapping, when LDAP group is mapped to a nonexistent local role. [#24431](https://github.com/ClickHouse/ClickHouse/pull/24431) ([Denis Glazachev](https://github.com/traceon)). +* Backported in [#25575](https://github.com/ClickHouse/ClickHouse/issues/25575): Fix incorrect monotonicity of toWeek function. This fixes [#24422](https://github.com/ClickHouse/ClickHouse/issues/24422) . This bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/5212 , and was exposed later by smarter partition pruner. [#24446](https://github.com/ClickHouse/ClickHouse/pull/24446) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#24701](https://github.com/ClickHouse/ClickHouse/issues/24701): Fixed the behavior when query `SYSTEM RESTART REPLICA` or `SYSTEM SYNC REPLICA` is being processed infinitely. This was detected on server with extremely little amount of RAM. [#24457](https://github.com/ClickHouse/ClickHouse/pull/24457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#24852](https://github.com/ClickHouse/ClickHouse/issues/24852): Fix bug when exception `Mutation was killed` can be thrown to the client on mutation wait when mutation not loaded into memory yet. [#24809](https://github.com/ClickHouse/ClickHouse/pull/24809) ([alesapin](https://github.com/alesapin)). +* Backported in [#25184](https://github.com/ClickHouse/ClickHouse/issues/25184): Fixed bug with declaring S3 disk at root of bucket. Earlier, it reported an error: ``` [heather] 2021.05.10 02:11:11.932234 [ 72790 ] {2ff80b7b-ec53-41cb-ac35-19bb390e1759} executeQuery: Code: 36, e.displayText() = DB::Exception: Key name is empty in path style S3 URI: (http://172.17.0.2/bucket/) (version 21.6.1.1) (from 127.0.0.1:47994) (in query: SELECT policy_name FROM system.storage_policies), Stack trace (when copying this message, always include the lines below):. [#24898](https://github.com/ClickHouse/ClickHouse/pull/24898) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#24951](https://github.com/ClickHouse/ClickHouse/issues/24951): Fix possible heap-buffer-overflow in Arrow. [#24922](https://github.com/ClickHouse/ClickHouse/pull/24922) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25209](https://github.com/ClickHouse/ClickHouse/issues/25209): Fix crash in query with cross join and `joined_subquery_requires_alias = 0`. Fixes [#24011](https://github.com/ClickHouse/ClickHouse/issues/24011). [#25082](https://github.com/ClickHouse/ClickHouse/pull/25082) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#25350](https://github.com/ClickHouse/ClickHouse/issues/25350): Fix TOCTOU error in installation script. [#25277](https://github.com/ClickHouse/ClickHouse/pull/25277) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#25354](https://github.com/ClickHouse/ClickHouse/issues/25354): Fix Logical Error Cannot sum Array/Tuple in min/maxMap. [#25298](https://github.com/ClickHouse/ClickHouse/pull/25298) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#25503](https://github.com/ClickHouse/ClickHouse/issues/25503): Fix segfault when sharding_key is absent in task config for copier. [#25419](https://github.com/ClickHouse/ClickHouse/pull/25419) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#25713](https://github.com/ClickHouse/ClickHouse/issues/25713): Fixed `No such file or directory` error on moving `Distributed` table between databases. Fixes [#24971](https://github.com/ClickHouse/ClickHouse/issues/24971). [#25667](https://github.com/ClickHouse/ClickHouse/pull/25667) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#26145](https://github.com/ClickHouse/ClickHouse/issues/26145): Fix possible crash in `pointInPolygon` if the setting `validate_polygons` is turned off. [#26113](https://github.com/ClickHouse/ClickHouse/pull/26113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.8.2.3-stable.md b/docs/changelogs/v20.8.2.3-stable.md new file mode 100644 index 00000000000..9dda8b499c1 --- /dev/null +++ b/docs/changelogs/v20.8.2.3-stable.md @@ -0,0 +1,143 @@ +### ClickHouse release v20.8.2.3-stable FIXME as compared to v20.7.1.4310-prestable + +#### Backward Incompatible Change +* Remove support for the `ODBCDriver` input/output format. This was a deprecated format once used for communication with the ClickHouse ODBC driver, now long superseded by the `ODBCDriver2` format. Resolves [#13629](https://github.com/ClickHouse/ClickHouse/issues/13629). [#13847](https://github.com/ClickHouse/ClickHouse/pull/13847) ([hexiaoting](https://github.com/hexiaoting)). +* Extend `parallel_distributed_insert_select` setting, adding an option to run `INSERT` into local table. The setting changes type from `Bool` to `UInt64`, so the values `false` and `true` are no longer supported. If you have these values in server configuration, the server will not start. Please replace them with `0` and `1`, respectively. [#14060](https://github.com/ClickHouse/ClickHouse/pull/14060) ([Azat Khuzhin](https://github.com/azat)). +* Now `OPTIMIZE FINAL` query doesn't recalculate TTL for parts that were added before TTL was created. Use `ALTER TABLE ... MATERIALIZE TTL` once to calculate them, after that `OPTIMIZE FINAL` will evaluate TTL's properly. This behavior never worked for replicated tables. [#14220](https://github.com/ClickHouse/ClickHouse/pull/14220) ([alesapin](https://github.com/alesapin)). + +#### New Feature +* Support `MaterializeMySQL` database engine. Implements [#4006](https://github.com/ClickHouse/ClickHouse/issues/4006). [#10851](https://github.com/ClickHouse/ClickHouse/pull/10851) ([Winter Zhang](https://github.com/zhang2014)). +* Support Kerberos authentication in Kafka, using `krb5` and `cyrus-sasl` libraries. [#12771](https://github.com/ClickHouse/ClickHouse/pull/12771) ([Ilya Golshtein](https://github.com/ilejn)). +* Add types `Int128`, `Int256`, `UInt256` and related functions for them. Extend Decimals with Decimal256 (precision up to 76 digits). New types are under the setting `allow_experimental_bigint_types`. [#13097](https://github.com/ClickHouse/ClickHouse/pull/13097) ([Artem Zuikov](https://github.com/4ertus2)). +* Function `position` now supports optional `start_pos` argument. [#13237](https://github.com/ClickHouse/ClickHouse/pull/13237) ([Vladimir C](https://github.com/vdimir)). +* Add `ALTER SAMPLE BY` statement that allows to change table sample clause. [#13280](https://github.com/ClickHouse/ClickHouse/pull/13280) ([Amos Bird](https://github.com/amosbird)). +* Add new optional section to the main config. [#13425](https://github.com/ClickHouse/ClickHouse/pull/13425) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add function `normalizeQuery` that replaces literals, sequences of literals and complex aliases with placeholders. Add function `normalizedQueryHash` that returns identical 64bit hash values for similar queries. It helps to analyze query log. This closes [#11271](https://github.com/ClickHouse/ClickHouse/issues/11271). [#13816](https://github.com/ClickHouse/ClickHouse/pull/13816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `quantileExactLow` and `quantileExactHigh` implementations with respective aliases for `medianExactLow` and `medianExactHigh`. [#13818](https://github.com/ClickHouse/ClickHouse/pull/13818) ([Bharat Nallan](https://github.com/bharatnc)). +* Add function `defaultValueOfTypeName` that returns the default value for a given type. [#13877](https://github.com/ClickHouse/ClickHouse/pull/13877) ([hcz](https://github.com/hczhcz)). +* Added `date_trunc` function that truncates a date/time value to a specified date/time part. [#13888](https://github.com/ClickHouse/ClickHouse/pull/13888) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). +* Add the ability to specify `Default` compression codec for columns that correspond to settings specified in `config.xml`. Implements: [#9074](https://github.com/ClickHouse/ClickHouse/issues/9074). [#14049](https://github.com/ClickHouse/ClickHouse/pull/14049) ([alesapin](https://github.com/alesapin)). +* Add setting `min_index_granularity_bytes` that protects against accidentally creating a table with very low `index_granularity_bytes` setting. [#14139](https://github.com/ClickHouse/ClickHouse/pull/14139) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `countDigits(x)` function that count number of decimal digits in integer or decimal column. Add `isDecimalOverflow(d, [p])` function that checks if the value in Decimal column is out of its (or specified) precision. [#14151](https://github.com/ClickHouse/ClickHouse/pull/14151) ([Artem Zuikov](https://github.com/4ertus2)). + +#### Performance Improvement +* When performing trivial `INSERT SELECT` queries, automatically set `max_threads` to 1 or `max_insert_threads`, and set `max_block_size` to `min_insert_block_size_rows`. Related to [#5907](https://github.com/ClickHouse/ClickHouse/issues/5907). [#12195](https://github.com/ClickHouse/ClickHouse/pull/12195) ([flynn](https://github.com/ucasfl)). +* Optimize `has()`, `indexOf()` and `countEqual()` functions for `Array(LowCardinality(T))` and constant right arguments. [#12550](https://github.com/ClickHouse/ClickHouse/pull/12550) ([Mike Kot](https://github.com/myrrc)). +* Slightly improve performance of aggregation by UInt8/UInt16 keys. [#13099](https://github.com/ClickHouse/ClickHouse/pull/13099) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fail fast if `max_rows_to_read` limit is exceeded on parts scan. The motivation behind this change is to skip ranges scan for all selected parts if it is clear that `max_rows_to_read` is already exceeded. The change is quite noticeable for queries over big number of parts. [#13677](https://github.com/ClickHouse/ClickHouse/pull/13677) ([Roman Khavronenko](https://github.com/hagen1778)). +* Enable parallel INSERTs for table engines `Null`, `Memory`, `Distributed` and `Buffer`. [#14120](https://github.com/ClickHouse/ClickHouse/pull/14120) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* Added cache layer for DiskS3 (cache to local disk mark and index files). [#13076](https://github.com/ClickHouse/ClickHouse/pull/13076) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix assert when decimal has too large negative exponent. Fixes [#13188](https://github.com/ClickHouse/ClickHouse/issues/13188). [#13228](https://github.com/ClickHouse/ClickHouse/pull/13228) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQueryTimeMicroseconds to system.events. [#13336](https://github.com/ClickHouse/ClickHouse/pull/13336) ([ianton-ru](https://github.com/ianton-ru)). +* Proper remote host checking in S3 redirects (security-related thing). [#13404](https://github.com/ClickHouse/ClickHouse/pull/13404) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Enable mixed granularity parts by default. [#13449](https://github.com/ClickHouse/ClickHouse/pull/13449) ([alesapin](https://github.com/alesapin)). +* Move parts from DIskLocal to DiskS3 in parallel. [#13459](https://github.com/ClickHouse/ClickHouse/pull/13459) ([Pavel Kovalenko](https://github.com/Jokser)). +* Support compound identifiers for custom settings. [#13496](https://github.com/ClickHouse/ClickHouse/pull/13496) ([Vitaly Baranov](https://github.com/vitlibar)). +* Provide monotonicity for `toDate/toDateTime` functions in more cases. Now the input arguments are saturated more naturally and provides better monotonicity. [#13497](https://github.com/ClickHouse/ClickHouse/pull/13497) ([Amos Bird](https://github.com/amosbird)). +* In previous versions `lcm` function may produce assertion violation in debug build if called with specifically crafted arguments. This fixes [#13368](https://github.com/ClickHouse/ClickHouse/issues/13368). [#13510](https://github.com/ClickHouse/ClickHouse/pull/13510) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add MergeTree Write-Ahead-Log(WAL) dump tool. [#13640](https://github.com/ClickHouse/ClickHouse/pull/13640) ([BohuTANG](https://github.com/BohuTANG)). +* Backported in [#14360](https://github.com/ClickHouse/ClickHouse/issues/14360): Added Redis requirepass authorization. [#13688](https://github.com/ClickHouse/ClickHouse/pull/13688) ([Ivan Torgashov](https://github.com/it1804)). +* Avoid too slow queries when arrays are manipulated as fields. Throw exception instead. [#13753](https://github.com/ClickHouse/ClickHouse/pull/13753) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. Add [GTID-Based Replication](https://dev.mysql.com/doc/refman/5.7/en/replication-gtids-concepts.html), it works even when replication topology changes, and supported/prefered in MySQL 5.6/5.7/8.0 2. Add BIT/SET filed type supports 3. Fix up varchar type meta length bug. [#13820](https://github.com/ClickHouse/ClickHouse/pull/13820) ([BohuTANG](https://github.com/BohuTANG)). +* Fix data race in `lgamma` function. This race was caught only in `tsan`, no side effects a really happened. [#13842](https://github.com/ClickHouse/ClickHouse/pull/13842) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Function `arrayCompact` will compare NaNs bitwise if the type of array elements is Float32/Float64. In previous versions NaNs were always not equal if the type of array elements is Float32/Float64 and were always equal if the type is more complex, like Nullable(Float64). This closes [#13857](https://github.com/ClickHouse/ClickHouse/issues/13857). [#13868](https://github.com/ClickHouse/ClickHouse/pull/13868) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better error message for null value of TabSeparatedRow format. [#13906](https://github.com/ClickHouse/ClickHouse/pull/13906) ([jiang tao](https://github.com/tomjiang1987)). +* Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Corrected an error in AvroConfluent format that caused the Kafka table engine to stop processing messages when an abnormally small, malformed, message was received. [#13941](https://github.com/ClickHouse/ClickHouse/pull/13941) ([Gervasio Varela](https://github.com/gervarela)). +* Increase limit in -Resample combinator to 1M. [#13947](https://github.com/ClickHouse/ClickHouse/pull/13947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)). +* Slightly better performance of Memory table if it was constructed from a huge number of very small blocks (that's unlikely). Author of the idea: [Mark Papadakis](https://github.com/markpapadakis). Closes [#14043](https://github.com/ClickHouse/ClickHouse/issues/14043). [#14056](https://github.com/ClickHouse/ClickHouse/pull/14056) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now it's possible to `ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'zk://:/path-in-zookeeper'`. It's useful for shipping data to new clusters. [#14155](https://github.com/ClickHouse/ClickHouse/pull/14155) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix +* subquery hash values are not enough to distinguish. [#8333](https://github.com/ClickHouse/ClickHouse/issues/8333). [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)). +* Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)). +* Fix access to redis dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)). +* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible race in `StorageMemory`. https://clickhouse-test-reports.s3.yandex.net/0/9cac8a7244063d2092ad25d45502611e18d3749c/stress_test_(thread)/stderr.log Have no idea how to write a test. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix premature `ON CLUSTER` timeouts for queries that must be executed on a single replica. Fixes [#6704](https://github.com/ClickHouse/ClickHouse/issues/6704), Fixes [#7228](https://github.com/ClickHouse/ClickHouse/issues/7228), Fixes [#13361](https://github.com/ClickHouse/ClickHouse/issues/13361), Fixes [#11884](https://github.com/ClickHouse/ClickHouse/issues/11884). [#13450](https://github.com/ClickHouse/ClickHouse/pull/13450) ([alesapin](https://github.com/alesapin)). +* Fix secondary indices corruption in compact parts. [#13538](https://github.com/ClickHouse/ClickHouse/pull/13538) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix typo in error message about `The value of 'number_of_free_entries_in_pool_to_lower_max_size_of_merge' setting`. [#13678](https://github.com/ClickHouse/ClickHouse/pull/13678) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)). +* Do not optimize any(arrayJoin()) -> arrayJoin() under optimize_move_functions_out_of_any. [#13681](https://github.com/ClickHouse/ClickHouse/pull/13681) ([Azat Khuzhin](https://github.com/azat)). +* Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect message in `clickhouse-server.init` while checking user and group. [#13711](https://github.com/ClickHouse/ClickHouse/pull/13711) ([ylchou](https://github.com/ylchou)). +* Fixes /replicas_status endpoint response status code when verbose=1. [#13722](https://github.com/ClickHouse/ClickHouse/pull/13722) ([javi santana](https://github.com/javisantana)). +* Fix logging Settings.Names/Values when log_queries_min_type > QUERY_START. [#13737](https://github.com/ClickHouse/ClickHouse/pull/13737) ([Azat Khuzhin](https://github.com/azat)). +* Fix race condition between DETACH and background merges. Parts may revive after detach. This is continuation of [#8602](https://github.com/ClickHouse/ClickHouse/issues/8602) that did not fix the issue but introduced a test that started to fail in very rare cases, demonstrating the issue. [#13746](https://github.com/ClickHouse/ClickHouse/pull/13746) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add range check for h3KRing function. This fixes [#13633](https://github.com/ClickHouse/ClickHouse/issues/13633). [#13752](https://github.com/ClickHouse/ClickHouse/pull/13752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed `Directory not empty` error when concurrently executing `DROP DATABASE` and `CREATE TABLE`. [#13756](https://github.com/ClickHouse/ClickHouse/pull/13756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix step overflow in range(). [#13790](https://github.com/ClickHouse/ClickHouse/pull/13790) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#14387](https://github.com/ClickHouse/ClickHouse/issues/14387): Fix arrayJoin() capturing in lambda (LOGICAL_ERROR). [#13792](https://github.com/ClickHouse/ClickHouse/pull/13792) ([Azat Khuzhin](https://github.com/azat)). +* Fix reading from MergeTree table with INDEX of type SET fails when comparing against NULL. This fixes [#13686](https://github.com/ClickHouse/ClickHouse/issues/13686). [#13793](https://github.com/ClickHouse/ClickHouse/pull/13793) ([Amos Bird](https://github.com/amosbird)). +* Fix topK/topKWeighted merge (with non-default parameters). [#13817](https://github.com/ClickHouse/ClickHouse/pull/13817) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect sorting for `FixedString` columns. Fixes [#13182](https://github.com/ClickHouse/ClickHouse/issues/13182). [#13887](https://github.com/ClickHouse/ClickHouse/pull/13887) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed potential deadlock when renaming `Distributed` table. [#13922](https://github.com/ClickHouse/ClickHouse/pull/13922) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix wrong results in select queries with `DISTINCT` keyword in case `optimize_duplicate_order_by_and_distinct` setting is enabled. [#13925](https://github.com/ClickHouse/ClickHouse/pull/13925) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix parser to reject create table as table function with engine. [#13940](https://github.com/ClickHouse/ClickHouse/pull/13940) ([hcz](https://github.com/hczhcz)). +* Backported in [#14389](https://github.com/ClickHouse/ClickHouse/issues/14389): Fix GRANT ALL statement when executed on a non-global level. [#13987](https://github.com/ClickHouse/ClickHouse/pull/13987) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix pointInPolygon with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)). +* Backported in [#14311](https://github.com/ClickHouse/ClickHouse/issues/14311): Fix DistributedFilesToInsert metric (zeroed when it should not). [#14095](https://github.com/ClickHouse/ClickHouse/pull/14095) ([Azat Khuzhin](https://github.com/azat)). +* When waiting for a dictionary update to complete, use the timeout specified by `query_wait_timeout_milliseconds` setting instead of a hard-coded value. [#14105](https://github.com/ClickHouse/ClickHouse/pull/14105) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix formatting of minimal negative decimal numbers. This fixes [#14111](https://github.com/ClickHouse/ClickHouse/issues/14111). [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14312](https://github.com/ClickHouse/ClickHouse/issues/14312): Fix crash when INERT INTO Kafka engine table from an empty result set with a subquery. ... [#14203](https://github.com/ClickHouse/ClickHouse/pull/14203) ([Dongdong Yang](https://github.com/donge)). +* Fixed incorrect sorting order if LowCardinality column. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277 . [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#14310](https://github.com/ClickHouse/ClickHouse/issues/14310): fixes [#14231](https://github.com/ClickHouse/ClickHouse/issues/14231) fix wrong lexer in MaterializeMySQL database engine dump stage. [#14232](https://github.com/ClickHouse/ClickHouse/pull/14232) ([Winter Zhang](https://github.com/zhang2014)). +* Fix handling of empty transactions in `MaterializeMySQL` database engine. This fixes [#14235](https://github.com/ClickHouse/ClickHouse/issues/14235). [#14253](https://github.com/ClickHouse/ClickHouse/pull/14253) ([BohuTANG](https://github.com/BohuTANG)). +* Backported in [#14332](https://github.com/ClickHouse/ClickHouse/issues/14332): Disallows `CODEC` on `ALIAS` column type. Fixes [#13911](https://github.com/ClickHouse/ClickHouse/issues/13911). [#14263](https://github.com/ClickHouse/ClickHouse/pull/14263) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#14308](https://github.com/ClickHouse/ClickHouse/issues/14308): Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes [#13861](https://github.com/ClickHouse/ClickHouse/issues/13861). [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#14410](https://github.com/ClickHouse/ClickHouse/issues/14410): Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#14396](https://github.com/ClickHouse/ClickHouse/issues/14396): Fix exception during ALTER LIVE VIEW query with REFRESH command. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#14343](https://github.com/ClickHouse/ClickHouse/issues/14343): Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)). +* Backported in [#14363](https://github.com/ClickHouse/ClickHouse/issues/14363): Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#14506](https://github.com/ClickHouse/ClickHouse/issues/14506): Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#14487](https://github.com/ClickHouse/ClickHouse/issues/14487): Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)). +* Backported in [#14481](https://github.com/ClickHouse/ClickHouse/issues/14481): Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Move Dockerfiles from integration tests to `docker/test` directory. docker_compose files are available in `runner` docker container. Docker images are built in CI and not in integration tests. [#13448](https://github.com/ClickHouse/ClickHouse/pull/13448) ([Ilya Yatsishin](https://github.com/qoega)). +* Skip PR's from robot-clickhouse. [#13489](https://github.com/ClickHouse/ClickHouse/pull/13489) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix typos in code with codespell. [#13511](https://github.com/ClickHouse/ClickHouse/pull/13511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable check for typos in code with `codespell`. [#13513](https://github.com/ClickHouse/ClickHouse/pull/13513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ensure that there is no copy-pasted GPL code. [#13514](https://github.com/ClickHouse/ClickHouse/pull/13514) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to run `clickhouse` binary without configuration. [#13515](https://github.com/ClickHouse/ClickHouse/pull/13515) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added `clickhouse install` script, that is useful if you only have a single binary. [#13528](https://github.com/ClickHouse/ClickHouse/pull/13528) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the remaining shellcheck notices. A preparation to enable Shellcheck. [#13529](https://github.com/ClickHouse/ClickHouse/pull/13529) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable Shellcheck in CI as a linter of .sh tests. This closes [#13168](https://github.com/ClickHouse/ClickHouse/issues/13168). [#13530](https://github.com/ClickHouse/ClickHouse/pull/13530) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make sure [#10977](https://github.com/ClickHouse/ClickHouse/issues/10977) is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)). +* Increasing health-check timeouts for ClickHouse nodes and adding support to dump docker-compose logs if unhealthy containers found. [#13612](https://github.com/ClickHouse/ClickHouse/pull/13612) ([vzakaznikov](https://github.com/vzakaznikov)). +* Build ClickHouse with the most fresh tzdata from package repository. [#13623](https://github.com/ClickHouse/ClickHouse/pull/13623) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Removed `-DENABLE_CURL_CLIENT` for `contrib/aws`. [#13628](https://github.com/ClickHouse/ClickHouse/pull/13628) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Expose version of embedded tzdata via TZDATA_VERSION in system.build_options. [#13648](https://github.com/ClickHouse/ClickHouse/pull/13648) ([filimonov](https://github.com/filimonov)). +* Updating LDAP user authentication suite to check that it works with RBAC. [#13656](https://github.com/ClickHouse/ClickHouse/pull/13656) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add a CMake option to fail configuration instead of auto-reconfiguration, enabled by default. [#13687](https://github.com/ClickHouse/ClickHouse/pull/13687) ([Konstantin](https://github.com/podshumok)). +* Fix link error in shared build. [#13700](https://github.com/ClickHouse/ClickHouse/pull/13700) ([Amos Bird](https://github.com/amosbird)). +* FIx cassandra build on Mac OS. [#13708](https://github.com/ClickHouse/ClickHouse/pull/13708) ([Ilya Yatsishin](https://github.com/qoega)). +* Added docker image for style check. Added style check that all docker and docker compose files are located in docker directory. [#13724](https://github.com/ClickHouse/ClickHouse/pull/13724) ([Ilya Yatsishin](https://github.com/qoega)). +* ZooKeeper cannot work reliably in unit tests in CI infrastructure. Using unit tests for ZooKeeper interaction with real ZooKeeper is bad idea from the start (unit tests are not supposed to verify complex distributed systems). We already using integration tests for this purpose and they are better suited. [#13745](https://github.com/ClickHouse/ClickHouse/pull/13745) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Testflows LDAP module: adding missing certificates and dhparam.pem for openldap4. [#13780](https://github.com/ClickHouse/ClickHouse/pull/13780) ([vzakaznikov](https://github.com/vzakaznikov)). +* Enabled text-log in stress test to find more bugs. [#13855](https://github.com/ClickHouse/ClickHouse/pull/13855) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* * Adding retry logic when bringing up docker-compose cluster * Increasing COMPOSE_HTTP_TIMEOUT. [#14112](https://github.com/ClickHouse/ClickHouse/pull/14112) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add the ability to write js-style comments in skip_list.json. [#14159](https://github.com/ClickHouse/ClickHouse/pull/14159) ([alesapin](https://github.com/alesapin)). +* Switch tests docker images to use test-base parent. [#14167](https://github.com/ClickHouse/ClickHouse/pull/14167) ([Ilya Yatsishin](https://github.com/qoega)). +* Actually there are no symlinks there, so `-type f` is enough ``` ~/workspace/ClickHouse/contrib/cctz/testdata/zoneinfo$ find . -type l -ls | wc -l 0 ``` Closes [#14209](https://github.com/ClickHouse/ClickHouse/issues/14209). [#14215](https://github.com/ClickHouse/ClickHouse/pull/14215) ([filimonov](https://github.com/filimonov)). + +#### Other +* Fix readline so it dumps history to file now. [#13600](https://github.com/ClickHouse/ClickHouse/pull/13600) ([Amos Bird](https://github.com/amosbird)). +* Create `system` database with `Atomic` engine by default. [#13680](https://github.com/ClickHouse/ClickHouse/pull/13680) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Bump pymdown-extensions from 7.1 to 8.0 in /docs/tools'. [#13645](https://github.com/ClickHouse/ClickHouse/pull/13645) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). +* NO CL ENTRY: 'Bump protobuf from 3.12.4 to 3.13.0 in /docs/tools'. [#13824](https://github.com/ClickHouse/ClickHouse/pull/13824) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). + diff --git a/docs/changelogs/v20.8.3.18-stable.md b/docs/changelogs/v20.8.3.18-stable.md new file mode 100644 index 00000000000..92b86dd99e7 --- /dev/null +++ b/docs/changelogs/v20.8.3.18-stable.md @@ -0,0 +1,16 @@ +### ClickHouse release v20.8.3.18-stable FIXME as compared to v20.8.2.3-stable + +#### Improvement +* Backported in [#14874](https://github.com/ClickHouse/ClickHouse/issues/14874): Allow using multi-volume storage configuration in storage Distributed. [#14839](https://github.com/ClickHouse/ClickHouse/pull/14839) ([Pavel Kovalenko](https://github.com/Jokser)). +* Speed up server shutdown process if there are ongoing S3 requests. [#14858](https://github.com/ClickHouse/ClickHouse/pull/14858) ([Pavel Kovalenko](https://github.com/Jokser)). + +#### Bug Fix +* Backported in [#14600](https://github.com/ClickHouse/ClickHouse/issues/14600): Fix rare segfaults in functions with combinator -Resample, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#14727](https://github.com/ClickHouse/ClickHouse/issues/14727): Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#14653](https://github.com/ClickHouse/ClickHouse/issues/14653): Added the checker as neither calling `lc->isNullable()` nor calling `ls->getDictionaryPtr()->isNullable()` would return the correct result. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#14666](https://github.com/ClickHouse/ClickHouse/issues/14666): Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#14793](https://github.com/ClickHouse/ClickHouse/issues/14793): Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Backported in [#14722](https://github.com/ClickHouse/ClickHouse/issues/14722): Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#14910](https://github.com/ClickHouse/ClickHouse/issues/14910): Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#14943](https://github.com/ClickHouse/ClickHouse/issues/14943): Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.8.4.11-lts.md b/docs/changelogs/v20.8.4.11-lts.md new file mode 100644 index 00000000000..ea8b71e1992 --- /dev/null +++ b/docs/changelogs/v20.8.4.11-lts.md @@ -0,0 +1,38 @@ +### ClickHouse release v20.8.4.11-lts FIXME as compared to v20.8.3.18-stable + +#### Improvement +* Backported in [#15569](https://github.com/ClickHouse/ClickHouse/issues/15569): Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix +* Backported in [#15017](https://github.com/ClickHouse/ClickHouse/issues/15017): Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14823](https://github.com/ClickHouse/ClickHouse/issues/14823): Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15148](https://github.com/ClickHouse/ClickHouse/issues/15148): Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#15251](https://github.com/ClickHouse/ClickHouse/issues/15251): Fixed segfault in CacheDictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14955](https://github.com/ClickHouse/ClickHouse/issues/14955): Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#14987](https://github.com/ClickHouse/ClickHouse/issues/14987): Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#14969](https://github.com/ClickHouse/ClickHouse/issues/14969): Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Backported in [#15053](https://github.com/ClickHouse/ClickHouse/issues/15053): Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)). +* Backported in [#15077](https://github.com/ClickHouse/ClickHouse/issues/15077): Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15102](https://github.com/ClickHouse/ClickHouse/issues/15102): Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15056](https://github.com/ClickHouse/ClickHouse/issues/15056): If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15049](https://github.com/ClickHouse/ClickHouse/issues/15049): We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15142](https://github.com/ClickHouse/ClickHouse/issues/15142): Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15208](https://github.com/ClickHouse/ClickHouse/issues/15208): Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)). +* Backported in [#15222](https://github.com/ClickHouse/ClickHouse/issues/15222): Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Backported in [#15405](https://github.com/ClickHouse/ClickHouse/issues/15405): Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15489](https://github.com/ClickHouse/ClickHouse/issues/15489): Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15281](https://github.com/ClickHouse/ClickHouse/issues/15281): Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15402](https://github.com/ClickHouse/ClickHouse/issues/15402): Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#15338](https://github.com/ClickHouse/ClickHouse/issues/15338): Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Backported in [#15334](https://github.com/ClickHouse/ClickHouse/issues/15334): Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Backported in [#15447](https://github.com/ClickHouse/ClickHouse/issues/15447): Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Backported in [#15505](https://github.com/ClickHouse/ClickHouse/issues/15505): Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Backported in [#15550](https://github.com/ClickHouse/ClickHouse/issues/15550): Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Backported in [#15558](https://github.com/ClickHouse/ClickHouse/issues/15558): Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Backported in [#15726](https://github.com/ClickHouse/ClickHouse/issues/15726): Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15652](https://github.com/ClickHouse/ClickHouse/issues/15652): Fix 'Database doesn't exist.' in queries with IN and Distributed table when there's no database on initiator. [#15538](https://github.com/ClickHouse/ClickHouse/pull/15538) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15629](https://github.com/ClickHouse/ClickHouse/issues/15629): Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#15585](https://github.com/ClickHouse/ClickHouse/issues/15585): Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15666](https://github.com/ClickHouse/ClickHouse/issues/15666): Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15715](https://github.com/ClickHouse/ClickHouse/issues/15715): Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v20.8.5.45-lts.md b/docs/changelogs/v20.8.5.45-lts.md new file mode 100644 index 00000000000..c4043625f0d --- /dev/null +++ b/docs/changelogs/v20.8.5.45-lts.md @@ -0,0 +1,37 @@ +### ClickHouse release v20.8.5.45-lts FIXME as compared to v20.8.4.11-lts + +#### Improvement +* Backported in [#16146](https://github.com/ClickHouse/ClickHouse/issues/16146): Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* Backported in [#16310](https://github.com/ClickHouse/ClickHouse/issues/16310): Add allow_nondeterministic_optimize_skip_unused_shards (to allow non deterministic like rand() or dictGet() in sharding key). [#16105](https://github.com/ClickHouse/ClickHouse/pull/16105) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Backported in [#15620](https://github.com/ClickHouse/ClickHouse/issues/15620): Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#16203](https://github.com/ClickHouse/ClickHouse/issues/16203): Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Backported in [#16230](https://github.com/ClickHouse/ClickHouse/issues/16230): Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#15871](https://github.com/ClickHouse/ClickHouse/issues/15871): Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#15772](https://github.com/ClickHouse/ClickHouse/issues/15772): Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15738](https://github.com/ClickHouse/ClickHouse/issues/15738): Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#15795](https://github.com/ClickHouse/ClickHouse/issues/15795): Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#15900](https://github.com/ClickHouse/ClickHouse/issues/15900): Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Backported in [#15925](https://github.com/ClickHouse/ClickHouse/issues/15925): Now exception will be thrown when `ALTER MODIFY COLUMN ... DEFAULT ...` has incompatible default with column type. Fixes [#15854](https://github.com/ClickHouse/ClickHouse/issues/15854). [#15858](https://github.com/ClickHouse/ClickHouse/pull/15858) ([alesapin](https://github.com/alesapin)). +* Backported in [#15920](https://github.com/ClickHouse/ClickHouse/issues/15920): Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#16168](https://github.com/ClickHouse/ClickHouse/issues/16168): Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#16355](https://github.com/ClickHouse/ClickHouse/issues/16355): Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15968](https://github.com/ClickHouse/ClickHouse/issues/15968): Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16025](https://github.com/ClickHouse/ClickHouse/issues/16025): Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#16219](https://github.com/ClickHouse/ClickHouse/issues/16219): Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#16029](https://github.com/ClickHouse/ClickHouse/issues/16029): Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Backported in [#16088](https://github.com/ClickHouse/ClickHouse/issues/16088): Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#16360](https://github.com/ClickHouse/ClickHouse/issues/16360): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16141](https://github.com/ClickHouse/ClickHouse/issues/16141): Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Backported in [#16078](https://github.com/ClickHouse/ClickHouse/issues/16078): Fixes [#15780](https://github.com/ClickHouse/ClickHouse/issues/15780) regression, e.g. indexOf([1, 2, 3], toLowCardinality(1)) now is prohibited but it should not be. [#16038](https://github.com/ClickHouse/ClickHouse/pull/16038) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#16294](https://github.com/ClickHouse/ClickHouse/issues/16294): Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16233](https://github.com/ClickHouse/ClickHouse/issues/16233): Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#16325](https://github.com/ClickHouse/ClickHouse/issues/16325): Fix a possible memory leak during `GROUP BY` with string keys, caused by an error in `TwoLevelStringHashTable` implementation. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#16375](https://github.com/ClickHouse/ClickHouse/issues/16375): Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16431](https://github.com/ClickHouse/ClickHouse/issues/16431): Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#16450](https://github.com/ClickHouse/ClickHouse/issues/16450): Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Other +* Unfold `{database}`, `{table}` and `{uuid}` macros in `ReplicatedMergeTree` arguments on table creation. [#16159](https://github.com/ClickHouse/ClickHouse/pull/16159) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v20.8.6.6-lts.md b/docs/changelogs/v20.8.6.6-lts.md new file mode 100644 index 00000000000..d63266df523 --- /dev/null +++ b/docs/changelogs/v20.8.6.6-lts.md @@ -0,0 +1,12 @@ +### ClickHouse release v20.8.6.6-lts FIXME as compared to v20.8.5.45-lts + +#### Bug Fix +* Backported in [#16494](https://github.com/ClickHouse/ClickHouse/issues/16494): Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#16816](https://github.com/ClickHouse/ClickHouse/issues/16816): Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#16506](https://github.com/ClickHouse/ClickHouse/issues/16506): Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#16472](https://github.com/ClickHouse/ClickHouse/issues/16472): Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16573](https://github.com/ClickHouse/ClickHouse/issues/16573): Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Backported in [#16552](https://github.com/ClickHouse/ClickHouse/issues/16552): Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#16823](https://github.com/ClickHouse/ClickHouse/issues/16823): Fixed [#16081](https://github.com/ClickHouse/ClickHouse/issues/16081). [#16613](https://github.com/ClickHouse/ClickHouse/pull/16613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#16892](https://github.com/ClickHouse/ClickHouse/issues/16892): Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.8.7.15-lts.md b/docs/changelogs/v20.8.7.15-lts.md new file mode 100644 index 00000000000..826e4f7e070 --- /dev/null +++ b/docs/changelogs/v20.8.7.15-lts.md @@ -0,0 +1,19 @@ +### ClickHouse release v20.8.7.15-lts FIXME as compared to v20.8.6.6-lts + +#### Improvement +* Backported in [#17029](https://github.com/ClickHouse/ClickHouse/issues/17029): Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix +* Backported in [#15676](https://github.com/ClickHouse/ClickHouse/issues/15676): Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#17106](https://github.com/ClickHouse/ClickHouse/issues/17106): fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16761](https://github.com/ClickHouse/ClickHouse/issues/16761): This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16738](https://github.com/ClickHouse/ClickHouse/issues/16738): Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17021](https://github.com/ClickHouse/ClickHouse/issues/17021): Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#16880](https://github.com/ClickHouse/ClickHouse/issues/16880): Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)). +* Backported in [#16950](https://github.com/ClickHouse/ClickHouse/issues/16950): Prevent clickhouse server crashes when using TimeSeriesGroupSum. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)). +* Backported in [#17077](https://github.com/ClickHouse/ClickHouse/issues/17077): Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17007](https://github.com/ClickHouse/ClickHouse/issues/17007): Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)). +* Backported in [#17011](https://github.com/ClickHouse/ClickHouse/issues/17011): Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17040](https://github.com/ClickHouse/ClickHouse/issues/17040): Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)). +* Backported in [#17171](https://github.com/ClickHouse/ClickHouse/issues/17171): Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.8.8.2-lts.md b/docs/changelogs/v20.8.8.2-lts.md new file mode 100644 index 00000000000..73e566efac2 --- /dev/null +++ b/docs/changelogs/v20.8.8.2-lts.md @@ -0,0 +1,10 @@ +### ClickHouse release v20.8.8.2-lts FIXME as compared to v20.8.7.15-lts + +#### Bug Fix +* Backported in [#17198](https://github.com/ClickHouse/ClickHouse/issues/17198): Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#17131](https://github.com/ClickHouse/ClickHouse/issues/17131): Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17192](https://github.com/ClickHouse/ClickHouse/issues/17192): Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#17287](https://github.com/ClickHouse/ClickHouse/issues/17287): Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)). + diff --git a/docs/changelogs/v20.8.9.6-lts.md b/docs/changelogs/v20.8.9.6-lts.md new file mode 100644 index 00000000000..943bd9e57e7 --- /dev/null +++ b/docs/changelogs/v20.8.9.6-lts.md @@ -0,0 +1,2 @@ +### ClickHouse release v20.8.9.6-lts FIXME as compared to v20.8.8.2-lts + diff --git a/docs/changelogs/v20.9.1.4585-prestable.md b/docs/changelogs/v20.9.1.4585-prestable.md new file mode 100644 index 00000000000..db3b8a409a4 --- /dev/null +++ b/docs/changelogs/v20.9.1.4585-prestable.md @@ -0,0 +1,63 @@ +### ClickHouse release v20.9.1.4585-prestable FIXME as compared to v20.8.1.4513-prestable + +#### Backward Incompatible Change +* Added MergeTree settings (`max_replicated_merges_with_ttl_in_queue` and `max_number_of_merges_with_ttl_in_pool`) to control the number of merges with TTL in the background pool and replicated queue. This change breaks compatibility with older versions only if you use delete TTL. Otherwise, replication will stay compatible. You can avoid incompatibility issues if you update all shard replicas at once or execute `SYSTEM STOP TTL MERGES` until you finish the update of all replicas. If you'll get an incompatible entry in the replication queue, first of all, execute `SYSTEM STOP TTL MERGES` and after `ALTER TABLE ... DETACH PARTITION ...` the partition where incompatible TTL merge was assigned. Attach it back on a single replica. [#14490](https://github.com/ClickHouse/ClickHouse/pull/14490) ([alesapin](https://github.com/alesapin)). + +#### New Feature +* Add table function `view` which turns an subquery into a table object. This helps passing queries around. For instance, it can be used in remote/cluster table functions. [#12567](https://github.com/ClickHouse/ClickHouse/pull/12567) ([Amos Bird](https://github.com/amosbird)). +* Now we can write `select * apply(length) apply(max) from wide_string_table` to find out the maxium length of all string columns. And the follow two variants are provided too:. [#14233](https://github.com/ClickHouse/ClickHouse/pull/14233) ([Amos Bird](https://github.com/amosbird)). +* Add `query_start_time_microseconds` field to `system.query_log` & `system.query_thread_log` tables. [#14252](https://github.com/ClickHouse/ClickHouse/pull/14252) ([Bharat Nallan](https://github.com/bharatnc)). +* Added an aggregate function RankCorrelationSpearman which simply computes a rank correlation coefficient. Continuation of [#11769](https://github.com/ClickHouse/ClickHouse/issues/11769). [#14411](https://github.com/ClickHouse/ClickHouse/pull/14411) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added database generation by query util. Continuation of [#10973](https://github.com/ClickHouse/ClickHouse/issues/10973). [#14442](https://github.com/ClickHouse/ClickHouse/pull/14442) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Performance Improvement +* Optimize queries with LIMIT/LIMIT BY/ORDER BY for distributed with GROUP BY sharding_key (under optimize_skip_unused_shards and optimize_distributed_group_by_sharding_key). [#10373](https://github.com/ClickHouse/ClickHouse/pull/10373) ([Azat Khuzhin](https://github.com/azat)). + +#### Improvement +* Improvements in StorageRabbitMQ: Added connection and channels failure handling, proper commits, insert failures handling, better exchanges, queue durability and queue resume opportunity, new queue settings. Fixed tests. [#12761](https://github.com/ClickHouse/ClickHouse/pull/12761) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added Redis requirepass authorization. [#13688](https://github.com/ClickHouse/ClickHouse/pull/13688) ([Ivan Torgashov](https://github.com/it1804)). +* Add precision argument for DateTime type. [#13761](https://github.com/ClickHouse/ClickHouse/pull/13761) ([Winter Zhang](https://github.com/zhang2014)). +* Improve the Kafka engine performance by providing independent thread for each consumer. Separate thread pool for streaming engines (like Kafka). [#13939](https://github.com/ClickHouse/ClickHouse/pull/13939) ([fastio](https://github.com/fastio)). +* Add default compression codec for parts in `system.part_log` with name `default_compression_codec`. [#14116](https://github.com/ClickHouse/ClickHouse/pull/14116) ([alesapin](https://github.com/alesapin)). +* Replace wide integers from boost multiprecision with implementation from https://github.com/cerevra/int. [#14229](https://github.com/ClickHouse/ClickHouse/pull/14229) ([Artem Zuikov](https://github.com/4ertus2)). +* Implicitly convert primary key to not null in MaterializeMySQL(Same as MySQL). Fixes [#14114](https://github.com/ClickHouse/ClickHouse/issues/14114). [#14397](https://github.com/ClickHouse/ClickHouse/pull/14397) ([Winter Zhang](https://github.com/zhang2014)). +* Added new setting system_events_show_zero_values as proposed in [#11384](https://github.com/ClickHouse/ClickHouse/issues/11384). [#14404](https://github.com/ClickHouse/ClickHouse/pull/14404) ([Dmitry Rubashkin](https://github.com/dimarub2000)). +* Now obfuscator supports UUID type as proposed in [#13163](https://github.com/ClickHouse/ClickHouse/issues/13163). [#14409](https://github.com/ClickHouse/ClickHouse/pull/14409) ([Dmitry Rubashkin](https://github.com/dimarub2000)). +* Creating sets for multiple `JOIN` and `IN` in parallel. It may slightly improve performance for queries with several different `IN subquery` expressions. [#14412](https://github.com/ClickHouse/ClickHouse/pull/14412) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now TTLs will be applied during merge if they were not previously materialized. [#14438](https://github.com/ClickHouse/ClickHouse/pull/14438) ([alesapin](https://github.com/alesapin)). +* MySQL handler returns `OK` for queries like `SET @@var = value`. Such statement is ignored. It is needed because some MySQL drivers send `SET @@` query for setup after handshake [#9336](https://github.com/ClickHouse/ClickHouse/issues/9336)#issuecomment-686222422 . [#14469](https://github.com/ClickHouse/ClickHouse/pull/14469) ([BohuTANG](https://github.com/BohuTANG)). +* Speed up server shutdown process if there are ongoing S3 requests. [#14496](https://github.com/ClickHouse/ClickHouse/pull/14496) ([Pavel Kovalenko](https://github.com/Jokser)). +* Disallow empty time_zone argument in `toStartOf*` type of functions. [#14509](https://github.com/ClickHouse/ClickHouse/pull/14509) ([Bharat Nallan](https://github.com/bharatnc)). +* ... [#14523](https://github.com/ClickHouse/ClickHouse/pull/14523) ([BohuTANG](https://github.com/BohuTANG)). +* Use std::filesystem::path in ConfigProcessor for concatenating file paths. [#14558](https://github.com/ClickHouse/ClickHouse/pull/14558) ([Bharat Nallan](https://github.com/bharatnc)). + +#### Bug Fix +* Fix arrayJoin() capturing in lambda (LOGICAL_ERROR). [#13792](https://github.com/ClickHouse/ClickHouse/pull/13792) ([Azat Khuzhin](https://github.com/azat)). +* Fix GRANT ALL statement when executed on a non-global level. [#13987](https://github.com/ClickHouse/ClickHouse/pull/13987) ([Vitaly Baranov](https://github.com/vitlibar)). +* Disallows `CODEC` on `ALIAS` column type. Fixes [#13911](https://github.com/ClickHouse/ClickHouse/issues/13911). [#14263](https://github.com/ClickHouse/ClickHouse/pull/14263) ([Bharat Nallan](https://github.com/bharatnc)). +* Better check for tuple size in SSD cache complex key external dictionaries. This fixes [#13981](https://github.com/ClickHouse/ClickHouse/issues/13981). [#14313](https://github.com/ClickHouse/ClickHouse/pull/14313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)). +* Fix exception during ALTER LIVE VIEW query with REFRESH command. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)). +* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)). +* Proxy restart/start/stop/reload of SysVinit to systemd (if it is used). [#14460](https://github.com/ClickHouse/ClickHouse/pull/14460) ([Azat Khuzhin](https://github.com/azat)). +* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Integration tests use default base config. All config changes are explicit with main_configs, user_configs and dictionaries parameters for instance. [#13647](https://github.com/ClickHouse/ClickHouse/pull/13647) ([Ilya Yatsishin](https://github.com/qoega)). +* ... [#14368](https://github.com/ClickHouse/ClickHouse/pull/14368) ([BohuTANG](https://github.com/BohuTANG)). +* Fix the logic in backport script. In previous versions it was triggered for any labels of 100% red color. It was strange. [#14433](https://github.com/ClickHouse/ClickHouse/pull/14433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix missed `#include `. [#14440](https://github.com/ClickHouse/ClickHouse/pull/14440) ([Matwey V. Kornilov](https://github.com/matwey)). +* Prepare for build with clang 11. [#14455](https://github.com/ClickHouse/ClickHouse/pull/14455) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lower binary size in debug build by removing debug info from `Functions`. This is needed only for one internal project in Yandex who is using very old linker. [#14549](https://github.com/ClickHouse/ClickHouse/pull/14549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable ccache by default in cmake if it's found in OS. [#14575](https://github.com/ClickHouse/ClickHouse/pull/14575) ([alesapin](https://github.com/alesapin)). + +#### Other +* Changelog for 20.7 [#13499](https://github.com/ClickHouse/ClickHouse/issues/13499). [#14420](https://github.com/ClickHouse/ClickHouse/pull/14420) ([Alexander Kazakov](https://github.com/Akazz)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Less number of threads in builder"'. [#14421](https://github.com/ClickHouse/ClickHouse/pull/14421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.9.2.20-stable.md b/docs/changelogs/v20.9.2.20-stable.md new file mode 100644 index 00000000000..6c869e47a8f --- /dev/null +++ b/docs/changelogs/v20.9.2.20-stable.md @@ -0,0 +1,75 @@ +### ClickHouse release v20.9.2.20-stable FIXME as compared to v20.8.1.4513-prestable + +#### New Feature +* Add table function `view` which turns an subquery into a table object. This helps passing queries around. For instance, it can be used in remote/cluster table functions. [#12567](https://github.com/ClickHouse/ClickHouse/pull/12567) ([Amos Bird](https://github.com/amosbird)). +* Now we can write `select * apply(length) apply(max) from wide_string_table` to find out the maxium length of all string columns. And the follow two variants are provided too:. [#14233](https://github.com/ClickHouse/ClickHouse/pull/14233) ([Amos Bird](https://github.com/amosbird)). +* Added an aggregate function RankCorrelationSpearman which simply computes a rank correlation coefficient. Continuation of [#11769](https://github.com/ClickHouse/ClickHouse/issues/11769). [#14411](https://github.com/ClickHouse/ClickHouse/pull/14411) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added database generation by query util. Continuation of [#10973](https://github.com/ClickHouse/ClickHouse/issues/10973). [#14442](https://github.com/ClickHouse/ClickHouse/pull/14442) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Performance Improvement +* Optimize queries with LIMIT/LIMIT BY/ORDER BY for distributed with GROUP BY sharding_key (under optimize_skip_unused_shards and optimize_distributed_group_by_sharding_key). [#10373](https://github.com/ClickHouse/ClickHouse/pull/10373) ([Azat Khuzhin](https://github.com/azat)). + +#### Improvement +* Improvements in StorageRabbitMQ: Added connection and channels failure handling, proper commits, insert failures handling, better exchanges, queue durability and queue resume opportunity, new queue settings. Fixed tests. [#12761](https://github.com/ClickHouse/ClickHouse/pull/12761) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added Redis requirepass authorization. [#13688](https://github.com/ClickHouse/ClickHouse/pull/13688) ([Ivan Torgashov](https://github.com/it1804)). +* Add precision argument for DateTime type. [#13761](https://github.com/ClickHouse/ClickHouse/pull/13761) ([Winter Zhang](https://github.com/zhang2014)). +* Improve the Kafka engine performance by providing independent thread for each consumer. Separate thread pool for streaming engines (like Kafka). [#13939](https://github.com/ClickHouse/ClickHouse/pull/13939) ([fastio](https://github.com/fastio)). +* Add default compression codec for parts in `system.part_log` with name `default_compression_codec`. [#14116](https://github.com/ClickHouse/ClickHouse/pull/14116) ([alesapin](https://github.com/alesapin)). +* Replace wide integers from boost multiprecision with implementation from https://github.com/cerevra/int. [#14229](https://github.com/ClickHouse/ClickHouse/pull/14229) ([Artem Zuikov](https://github.com/4ertus2)). +* Implicitly convert primary key to not null in MaterializeMySQL(Same as MySQL). Fixes [#14114](https://github.com/ClickHouse/ClickHouse/issues/14114). [#14397](https://github.com/ClickHouse/ClickHouse/pull/14397) ([Winter Zhang](https://github.com/zhang2014)). +* Added new setting system_events_show_zero_values as proposed in [#11384](https://github.com/ClickHouse/ClickHouse/issues/11384). [#14404](https://github.com/ClickHouse/ClickHouse/pull/14404) ([Dmitry Rubashkin](https://github.com/dimarub2000)). +* Now obfuscator supports UUID type as proposed in [#13163](https://github.com/ClickHouse/ClickHouse/issues/13163). [#14409](https://github.com/ClickHouse/ClickHouse/pull/14409) ([Dmitry Rubashkin](https://github.com/dimarub2000)). +* Creating sets for multiple `JOIN` and `IN` in parallel. It may slightly improve performance for queries with several different `IN subquery` expressions. [#14412](https://github.com/ClickHouse/ClickHouse/pull/14412) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now TTLs will be applied during merge if they were not previously materialized. [#14438](https://github.com/ClickHouse/ClickHouse/pull/14438) ([alesapin](https://github.com/alesapin)). +* MySQL handler returns `OK` for queries like `SET @@var = value`. Such statement is ignored. It is needed because some MySQL drivers send `SET @@` query for setup after handshake [#9336](https://github.com/ClickHouse/ClickHouse/issues/9336)#issuecomment-686222422 . [#14469](https://github.com/ClickHouse/ClickHouse/pull/14469) ([BohuTANG](https://github.com/BohuTANG)). +* Disallow empty time_zone argument in `toStartOf*` type of functions. [#14509](https://github.com/ClickHouse/ClickHouse/pull/14509) ([Bharat Nallan](https://github.com/bharatnc)). +* ... [#14523](https://github.com/ClickHouse/ClickHouse/pull/14523) ([BohuTANG](https://github.com/BohuTANG)). +* Backported in [#14873](https://github.com/ClickHouse/ClickHouse/issues/14873): Allow using multi-volume storage configuration in storage Distributed. [#14839](https://github.com/ClickHouse/ClickHouse/pull/14839) ([Pavel Kovalenko](https://github.com/Jokser)). + +#### Bug Fix +* Fix arrayJoin() capturing in lambda (LOGICAL_ERROR). [#13792](https://github.com/ClickHouse/ClickHouse/pull/13792) ([Azat Khuzhin](https://github.com/azat)). +* Fix GRANT ALL statement when executed on a non-global level. [#13987](https://github.com/ClickHouse/ClickHouse/pull/13987) ([Vitaly Baranov](https://github.com/vitlibar)). +* Disallows `CODEC` on `ALIAS` column type. Fixes [#13911](https://github.com/ClickHouse/ClickHouse/issues/13911). [#14263](https://github.com/ClickHouse/ClickHouse/pull/14263) ([Bharat Nallan](https://github.com/bharatnc)). +* Better check for tuple size in SSD cache complex key external dictionaries. This fixes [#13981](https://github.com/ClickHouse/ClickHouse/issues/13981). [#14313](https://github.com/ClickHouse/ClickHouse/pull/14313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)). +* Fix exception during ALTER LIVE VIEW query with REFRESH command. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)). +* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)). +* Proxy restart/start/stop/reload of SysVinit to systemd (if it is used). [#14460](https://github.com/ClickHouse/ClickHouse/pull/14460) ([Azat Khuzhin](https://github.com/azat)). +* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15020](https://github.com/ClickHouse/ClickHouse/issues/15020): Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14599](https://github.com/ClickHouse/ClickHouse/issues/14599): Fix rare segfaults in functions with combinator -Resample, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#14729](https://github.com/ClickHouse/ClickHouse/issues/14729): Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#14654](https://github.com/ClickHouse/ClickHouse/issues/14654): Added the checker as neither calling `lc->isNullable()` nor calling `ls->getDictionaryPtr()->isNullable()` would return the correct result. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#14663](https://github.com/ClickHouse/ClickHouse/issues/14663): Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#14662](https://github.com/ClickHouse/ClickHouse/issues/14662): Stuff the query into ASTFunction's argument list so that we don't break the presumptions of some AST visitors. This fixes [#14608](https://github.com/ClickHouse/ClickHouse/issues/14608). [#14611](https://github.com/ClickHouse/ClickHouse/pull/14611) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#14792](https://github.com/ClickHouse/ClickHouse/issues/14792): Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Backported in [#14721](https://github.com/ClickHouse/ClickHouse/issues/14721): Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#14770](https://github.com/ClickHouse/ClickHouse/issues/14770): Replace column transformer should replace identifiers with cloned ASTs. This fixes [#14695](https://github.com/ClickHouse/ClickHouse/issues/14695) . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#14825](https://github.com/ClickHouse/ClickHouse/issues/14825): Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#14912](https://github.com/ClickHouse/ClickHouse/issues/14912): Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#14944](https://github.com/ClickHouse/ClickHouse/issues/14944): Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#14956](https://github.com/ClickHouse/ClickHouse/issues/14956): Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#14989](https://github.com/ClickHouse/ClickHouse/issues/14989): Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#15055](https://github.com/ClickHouse/ClickHouse/issues/15055): Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)). +* Backported in [#15079](https://github.com/ClickHouse/ClickHouse/issues/15079): Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15059](https://github.com/ClickHouse/ClickHouse/issues/15059): If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15052](https://github.com/ClickHouse/ClickHouse/issues/15052): We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Integration tests use default base config. All config changes are explicit with main_configs, user_configs and dictionaries parameters for instance. [#13647](https://github.com/ClickHouse/ClickHouse/pull/13647) ([Ilya Yatsishin](https://github.com/qoega)). +* ... [#14368](https://github.com/ClickHouse/ClickHouse/pull/14368) ([BohuTANG](https://github.com/BohuTANG)). +* Fix the logic in backport script. In previous versions it was triggered for any labels of 100% red color. It was strange. [#14433](https://github.com/ClickHouse/ClickHouse/pull/14433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix missed `#include `. [#14440](https://github.com/ClickHouse/ClickHouse/pull/14440) ([Matwey V. Kornilov](https://github.com/matwey)). +* Prepare for build with clang 11. [#14455](https://github.com/ClickHouse/ClickHouse/pull/14455) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lower binary size in debug build by removing debug info from `Functions`. This is needed only for one internal project in Yandex who is using very old linker. [#14549](https://github.com/ClickHouse/ClickHouse/pull/14549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Other +* Changelog for 20.7 [#13499](https://github.com/ClickHouse/ClickHouse/issues/13499). [#14420](https://github.com/ClickHouse/ClickHouse/pull/14420) ([Alexander Kazakov](https://github.com/Akazz)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Less number of threads in builder"'. [#14421](https://github.com/ClickHouse/ClickHouse/pull/14421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.9.3.45-stable.md b/docs/changelogs/v20.9.3.45-stable.md new file mode 100644 index 00000000000..49cebd73525 --- /dev/null +++ b/docs/changelogs/v20.9.3.45-stable.md @@ -0,0 +1,33 @@ +### ClickHouse release v20.9.3.45-stable FIXME as compared to v20.9.2.20-stable + +#### Improvement +* Backported in [#15568](https://github.com/ClickHouse/ClickHouse/issues/15568): Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix +* Backported in [#15150](https://github.com/ClickHouse/ClickHouse/issues/15150): Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Backported in [#15250](https://github.com/ClickHouse/ClickHouse/issues/15250): Fixed segfault in CacheDictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#14971](https://github.com/ClickHouse/ClickHouse/issues/14971): Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Backported in [#15104](https://github.com/ClickHouse/ClickHouse/issues/15104): Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15141](https://github.com/ClickHouse/ClickHouse/issues/15141): Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15209](https://github.com/ClickHouse/ClickHouse/issues/15209): Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)). +* Backported in [#15224](https://github.com/ClickHouse/ClickHouse/issues/15224): Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Backported in [#15403](https://github.com/ClickHouse/ClickHouse/issues/15403): Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#15487](https://github.com/ClickHouse/ClickHouse/issues/15487): Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15282](https://github.com/ClickHouse/ClickHouse/issues/15282): Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15401](https://github.com/ClickHouse/ClickHouse/issues/15401): Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#15340](https://github.com/ClickHouse/ClickHouse/issues/15340): Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Backported in [#15336](https://github.com/ClickHouse/ClickHouse/issues/15336): Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Backported in [#15588](https://github.com/ClickHouse/ClickHouse/issues/15588): Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)). +* Backported in [#15446](https://github.com/ClickHouse/ClickHouse/issues/15446): Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Backported in [#15507](https://github.com/ClickHouse/ClickHouse/issues/15507): Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Backported in [#15549](https://github.com/ClickHouse/ClickHouse/issues/15549): Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Backported in [#15560](https://github.com/ClickHouse/ClickHouse/issues/15560): Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Backported in [#15725](https://github.com/ClickHouse/ClickHouse/issues/15725): Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15655](https://github.com/ClickHouse/ClickHouse/issues/15655): Fix 'Database doesn't exist.' in queries with IN and Distributed table when there's no database on initiator. [#15538](https://github.com/ClickHouse/ClickHouse/pull/15538) ([Artem Zuikov](https://github.com/4ertus2)). +* Backported in [#15631](https://github.com/ClickHouse/ClickHouse/issues/15631): Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#15583](https://github.com/ClickHouse/ClickHouse/issues/15583): Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#15665](https://github.com/ClickHouse/ClickHouse/issues/15665): Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15712](https://github.com/ClickHouse/ClickHouse/issues/15712): Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#15696](https://github.com/ClickHouse/ClickHouse/issues/15696): Fix race condition in AMQP-CPP. [#15667](https://github.com/ClickHouse/ClickHouse/pull/15667) ([alesapin](https://github.com/alesapin)). +* Backported in [#15739](https://github.com/ClickHouse/ClickHouse/issues/15739): Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v20.9.4.76-stable.md b/docs/changelogs/v20.9.4.76-stable.md new file mode 100644 index 00000000000..f1ec2252bc2 --- /dev/null +++ b/docs/changelogs/v20.9.4.76-stable.md @@ -0,0 +1,37 @@ +### ClickHouse release v20.9.4.76-stable FIXME as compared to v20.9.3.45-stable + +#### Improvement +* Backported in [#16145](https://github.com/ClickHouse/ClickHouse/issues/16145): Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* Backported in [#16312](https://github.com/ClickHouse/ClickHouse/issues/16312): Add allow_nondeterministic_optimize_skip_unused_shards (to allow non deterministic like rand() or dictGet() in sharding key). [#16105](https://github.com/ClickHouse/ClickHouse/pull/16105) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Backported in [#15618](https://github.com/ClickHouse/ClickHouse/issues/15618): Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)). +* Backported in [#16201](https://github.com/ClickHouse/ClickHouse/issues/16201): Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Backported in [#16229](https://github.com/ClickHouse/ClickHouse/issues/16229): Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Backported in [#15870](https://github.com/ClickHouse/ClickHouse/issues/15870): Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#15774](https://github.com/ClickHouse/ClickHouse/issues/15774): Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15798](https://github.com/ClickHouse/ClickHouse/issues/15798): Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#15868](https://github.com/ClickHouse/ClickHouse/issues/15868): Fix `select count()` inaccuracy for MaterializeMySQL. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15901](https://github.com/ClickHouse/ClickHouse/issues/15901): Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Backported in [#15924](https://github.com/ClickHouse/ClickHouse/issues/15924): Now exception will be thrown when `ALTER MODIFY COLUMN ... DEFAULT ...` has incompatible default with column type. Fixes [#15854](https://github.com/ClickHouse/ClickHouse/issues/15854). [#15858](https://github.com/ClickHouse/ClickHouse/pull/15858) ([alesapin](https://github.com/alesapin)). +* Backported in [#15919](https://github.com/ClickHouse/ClickHouse/issues/15919): Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#16167](https://github.com/ClickHouse/ClickHouse/issues/16167): Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#16356](https://github.com/ClickHouse/ClickHouse/issues/16356): Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#15971](https://github.com/ClickHouse/ClickHouse/issues/15971): Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16022](https://github.com/ClickHouse/ClickHouse/issues/16022): Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#16218](https://github.com/ClickHouse/ClickHouse/issues/16218): Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#16026](https://github.com/ClickHouse/ClickHouse/issues/16026): Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Backported in [#16091](https://github.com/ClickHouse/ClickHouse/issues/16091): Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#16361](https://github.com/ClickHouse/ClickHouse/issues/16361): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16140](https://github.com/ClickHouse/ClickHouse/issues/16140): Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Backported in [#16079](https://github.com/ClickHouse/ClickHouse/issues/16079): Fixes [#15780](https://github.com/ClickHouse/ClickHouse/issues/15780) regression, e.g. indexOf([1, 2, 3], toLowCardinality(1)) now is prohibited but it should not be. [#16038](https://github.com/ClickHouse/ClickHouse/pull/16038) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#16298](https://github.com/ClickHouse/ClickHouse/issues/16298): Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16232](https://github.com/ClickHouse/ClickHouse/issues/16232): Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#16326](https://github.com/ClickHouse/ClickHouse/issues/16326): Fix a possible memory leak during `GROUP BY` with string keys, caused by an error in `TwoLevelStringHashTable` implementation. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#16377](https://github.com/ClickHouse/ClickHouse/issues/16377): Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16432](https://github.com/ClickHouse/ClickHouse/issues/16432): Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#16449](https://github.com/ClickHouse/ClickHouse/issues/16449): Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Other +* Unfold `{database}`, `{table}` and `{uuid}` macros in `ReplicatedMergeTree` arguments on table creation. [#16160](https://github.com/ClickHouse/ClickHouse/pull/16160) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v20.9.5.5-stable.md b/docs/changelogs/v20.9.5.5-stable.md new file mode 100644 index 00000000000..d200972e071 --- /dev/null +++ b/docs/changelogs/v20.9.5.5-stable.md @@ -0,0 +1,12 @@ +### ClickHouse release v20.9.5.5-stable FIXME as compared to v20.9.4.76-stable + +#### Bug Fix +* Backported in [#16493](https://github.com/ClickHouse/ClickHouse/issues/16493): Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#16817](https://github.com/ClickHouse/ClickHouse/issues/16817): Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#16508](https://github.com/ClickHouse/ClickHouse/issues/16508): Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#16474](https://github.com/ClickHouse/ClickHouse/issues/16474): Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16571](https://github.com/ClickHouse/ClickHouse/issues/16571): Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Backported in [#16554](https://github.com/ClickHouse/ClickHouse/issues/16554): Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike Kot](https://github.com/myrrc)). +* Backported in [#16748](https://github.com/ClickHouse/ClickHouse/issues/16748): Fixed [#16081](https://github.com/ClickHouse/ClickHouse/issues/16081). [#16613](https://github.com/ClickHouse/ClickHouse/pull/16613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#16894](https://github.com/ClickHouse/ClickHouse/issues/16894): Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v20.9.6.14-stable.md b/docs/changelogs/v20.9.6.14-stable.md new file mode 100644 index 00000000000..05fdfcc30aa --- /dev/null +++ b/docs/changelogs/v20.9.6.14-stable.md @@ -0,0 +1,19 @@ +### ClickHouse release v20.9.6.14-stable FIXME as compared to v20.9.5.5-stable + +#### Improvement +* Backported in [#17030](https://github.com/ClickHouse/ClickHouse/issues/17030): Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix +* Backported in [#15678](https://github.com/ClickHouse/ClickHouse/issues/15678): Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#17105](https://github.com/ClickHouse/ClickHouse/issues/17105): fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#16759](https://github.com/ClickHouse/ClickHouse/issues/16759): This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#16740](https://github.com/ClickHouse/ClickHouse/issues/16740): Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17023](https://github.com/ClickHouse/ClickHouse/issues/17023): Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#16879](https://github.com/ClickHouse/ClickHouse/issues/16879): Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)). +* Backported in [#16945](https://github.com/ClickHouse/ClickHouse/issues/16945): Prevent clickhouse server crashes when using TimeSeriesGroupSum. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)). +* Backported in [#17078](https://github.com/ClickHouse/ClickHouse/issues/17078): Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17009](https://github.com/ClickHouse/ClickHouse/issues/17009): Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)). +* Backported in [#17013](https://github.com/ClickHouse/ClickHouse/issues/17013): Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17037](https://github.com/ClickHouse/ClickHouse/issues/17037): Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)). +* Backported in [#17172](https://github.com/ClickHouse/ClickHouse/issues/17172): Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v20.9.7.11-stable.md b/docs/changelogs/v20.9.7.11-stable.md new file mode 100644 index 00000000000..e5b35a3eb32 --- /dev/null +++ b/docs/changelogs/v20.9.7.11-stable.md @@ -0,0 +1,27 @@ +### ClickHouse release v20.9.7.11-stable FIXME as compared to v20.9.6.14-stable + +#### Performance Improvement +* Backported in [#17590](https://github.com/ClickHouse/ClickHouse/issues/17590): Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* Backported in [#17316](https://github.com/ClickHouse/ClickHouse/issues/17316): Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). +* Backported in [#17343](https://github.com/ClickHouse/ClickHouse/issues/17343): TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17196](https://github.com/ClickHouse/ClickHouse/issues/17196): Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#17432](https://github.com/ClickHouse/ClickHouse/issues/17432): Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). +* Backported in [#17129](https://github.com/ClickHouse/ClickHouse/issues/17129): Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17195](https://github.com/ClickHouse/ClickHouse/issues/17195): Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17402](https://github.com/ClickHouse/ClickHouse/issues/17402): Fix [#15235](https://github.com/ClickHouse/ClickHouse/issues/15235). When clickhouse-copier handle non-partitioned table, throws segfault error. [#17248](https://github.com/ClickHouse/ClickHouse/pull/17248) ([Qi Chen](https://github.com/kaka11chen)). +* Backported in [#17409](https://github.com/ClickHouse/ClickHouse/issues/17409): Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#17488](https://github.com/ClickHouse/ClickHouse/issues/17488): Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17492](https://github.com/ClickHouse/ClickHouse/issues/17492): Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#17522](https://github.com/ClickHouse/ClickHouse/issues/17522): Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#17535](https://github.com/ClickHouse/ClickHouse/issues/17535): Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). +* Backported in [#17627](https://github.com/ClickHouse/ClickHouse/issues/17627): Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Backported in [#17608](https://github.com/ClickHouse/ClickHouse/issues/17608): When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#17729](https://github.com/ClickHouse/ClickHouse/issues/17729): Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#17782](https://github.com/ClickHouse/ClickHouse/issues/17782): Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). +* Backported in [#17814](https://github.com/ClickHouse/ClickHouse/issues/17814): Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). + +#### Build/Testing/Packaging Improvement +* Backported in [#17288](https://github.com/ClickHouse/ClickHouse/issues/17288): Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)). + From 0036b1a6eed3360a145b89be52372668d99255fa Mon Sep 17 00:00:00 2001 From: guykohen Date: Tue, 24 May 2022 18:24:54 -0400 Subject: [PATCH 500/615] Remove height restrictions from the query div in play web tool, and make sure width of the query box won't shrink below 100%. --- programs/server/play.html | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 6b530790ad0..69ef616db09 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -148,13 +148,15 @@ #query_div { - height: 100%; + /* Make enough space for medium/large queries but allowing query textarea to grow. */ + min-height: 20%; + display: grid; } #query { - height: 100%; - width: 100%; + /* Keeps query text-area's width full screen even when user adjusting the width of the query box. */ + min-width: 100%; } #inputs From ef187fca5481e688c4d9cef138e37365f4935232 Mon Sep 17 00:00:00 2001 From: guykohen Date: Tue, 24 May 2022 20:10:02 -0400 Subject: [PATCH 501/615] Remove height restrictions from the query div in play web tool, and make sure width of the query box won't shrink below 100%. Default height is set to 20% which should be big enough for medium queries. --- programs/server/play.html | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 69ef616db09..31284f8feb5 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -101,14 +101,9 @@ #controls { - /* Make enough space for even huge queries. */ - height: 20%; /* When a page will be scrolled horizontally due to large table size, keep controls in place. */ position: sticky; left: 0; - /* This allows query textarea to occupy the remaining height while other elements have fixed height. */ - display: flex; - flex-direction: column; } /* Otherwise Webkit based browsers will display ugly border on focus. */ @@ -146,15 +141,10 @@ background-color: var(--element-background-color); } - #query_div - { - /* Make enough space for medium/large queries but allowing query textarea to grow. */ - min-height: 20%; - display: grid; - } - #query { + /* Make enough space for even big queries. */ + height: calc(20vh); /* Keeps query text-area's width full screen even when user adjusting the width of the query box. */ min-width: 100%; } From 938e766d7ee58e0e9943042462c7a6663f2526be Mon Sep 17 00:00:00 2001 From: guykohen Date: Tue, 24 May 2022 20:10:02 -0400 Subject: [PATCH 502/615] Remove height restrictions from the query div in play web tool, and make sure width of the query box won't shrink below 100%. Default height is set to 20% which should be big enough for medium queries. --- programs/server/play.html | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 69ef616db09..5d0482c8169 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -101,14 +101,9 @@ #controls { - /* Make enough space for even huge queries. */ - height: 20%; /* When a page will be scrolled horizontally due to large table size, keep controls in place. */ position: sticky; left: 0; - /* This allows query textarea to occupy the remaining height while other elements have fixed height. */ - display: flex; - flex-direction: column; } /* Otherwise Webkit based browsers will display ugly border on focus. */ @@ -146,15 +141,10 @@ background-color: var(--element-background-color); } - #query_div - { - /* Make enough space for medium/large queries but allowing query textarea to grow. */ - min-height: 20%; - display: grid; - } - #query { + /* Make enough space for even big queries. */ + height: 20vh; /* Keeps query text-area's width full screen even when user adjusting the width of the query box. */ min-width: 100%; } From 57cfc0bd0443fee081172743abbd6b84b66f5e82 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Wed, 25 May 2022 06:17:15 +0530 Subject: [PATCH 503/615] check for validity of h3 index --- src/Functions/h3GetUnidirectionalEdge.cpp | 6 ++++++ tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/Functions/h3GetUnidirectionalEdge.cpp b/src/Functions/h3GetUnidirectionalEdge.cpp index 978cb3d8d65..7d4122150e6 100644 --- a/src/Functions/h3GetUnidirectionalEdge.cpp +++ b/src/Functions/h3GetUnidirectionalEdge.cpp @@ -92,6 +92,12 @@ public: { const UInt64 origin = data_hindex_origin[row]; const UInt64 dest = data_hindex_dest[row]; + + if (!isValidCell(origin)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid origin H3 index: {}", origin); + if (!isValidCell(dest)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid dest H3 index: {}", dest); + UInt64 res = getUnidirectionalEdge(origin, dest); dst_data[row] = res; } diff --git a/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql index 05b391241e0..551f59fd2f3 100644 --- a/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql +++ b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql @@ -21,3 +21,5 @@ select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('852834 SELECT h3UnidirectionalEdgeIsValid(1248204388774707199) as edge; SELECT h3UnidirectionalEdgeIsValid(1248204388774707197) as edge; + +SELECT h3GetUnidirectionalEdge(stringToH3('85283473ffffff'), stringToH3('852\03477fffffff')), NULL, NULL; -- { serverError 43 } From c586c91c09d7e331cc38c9990e79c94dae6f61f2 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Wed, 25 May 2022 07:16:45 +0530 Subject: [PATCH 504/615] add more tests --- .../0_stateless/02292_h3_unidirectional_funcs.reference | 6 ++++++ .../0_stateless/02292_h3_unidirectional_funcs.sql | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02292_h3_unidirectional_funcs.reference b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.reference index b0928acc80e..219d9888943 100644 --- a/tests/queries/0_stateless/02292_h3_unidirectional_funcs.reference +++ b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.reference @@ -1,15 +1,21 @@ 599686043507097597 0 +0 (599686042433355775,599686043507097599) (0,0) +(0,0) 599686042433355775 599686042433355773 0 +0 [(37.42012867767779,-122.03773496427027),(37.33755608435299,-122.090428929044)] [] +[] [1248204388774707199,1320261982812635135,1392319576850563071,1464377170888491007,1536434764926418943,1608492358964346879] [1248204388774707197,1320261982812635133,1392319576850563069,1464377170888491005,1536434764926418941,1608492358964346877] +[1262459476296859647,1334517070334787583,1406574664372715519,1478632258410643455,1550689852448571391,1622747446486499327] 1248204388774707199 0 1 0 +0 diff --git a/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql index 551f59fd2f3..4082671356e 100644 --- a/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql +++ b/tests/queries/0_stateless/02292_h3_unidirectional_funcs.sql @@ -2,24 +2,29 @@ SELECT h3GetDestinationIndexFromUnidirectionalEdge(1248204388774707197); SELECT h3GetDestinationIndexFromUnidirectionalEdge(599686042433355773); +SELECT h3GetDestinationIndexFromUnidirectionalEdge(stringToH3('85283473ffffff')); SELECT h3GetIndexesFromUnidirectionalEdge(1248204388774707199); SELECT h3GetIndexesFromUnidirectionalEdge(599686042433355775); +SELECT h3GetIndexesFromUnidirectionalEdge(stringToH3('85283473ffffff')); SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707199); SELECT h3GetOriginIndexFromUnidirectionalEdge(1248204388774707197); SELECT h3GetOriginIndexFromUnidirectionalEdge(599686042433355775); +SELECT h3GetOriginIndexFromUnidirectionalEdge(stringToH3('85283473ffffff')); SELECT h3GetUnidirectionalEdgeBoundary(1248204388774707199); SELECT h3GetUnidirectionalEdgeBoundary(599686042433355773); +SELECT h3GetUnidirectionalEdgeBoundary(stringToH3('85283473ffffff')); SELECT h3GetUnidirectionalEdgesFromHexagon(1248204388774707199); SELECT h3GetUnidirectionalEdgesFromHexagon(599686042433355773); +SELECT h3GetUnidirectionalEdgesFromHexagon(stringToH3('85283473ffffff')); select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('85283477fffffff')); select h3GetUnidirectionalEdge(stringToH3('85283473fffffff'), stringToH3('85283473fffffff')); +SELECT h3GetUnidirectionalEdge(stringToH3('85283473ffffff'), stringToH3('852\03477fffffff')); -- { serverError 43 } SELECT h3UnidirectionalEdgeIsValid(1248204388774707199) as edge; SELECT h3UnidirectionalEdgeIsValid(1248204388774707197) as edge; - -SELECT h3GetUnidirectionalEdge(stringToH3('85283473ffffff'), stringToH3('852\03477fffffff')), NULL, NULL; -- { serverError 43 } +SELECT h3UnidirectionalEdgeIsValid(stringToH3('85283473ffffff')) as edge; From 4556904dae7ca1e3cfc0079b0dd8939ab36d51f1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 25 May 2022 04:21:25 +0200 Subject: [PATCH 505/615] Fix stress test --- programs/su/su.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/su/su.cpp b/programs/su/su.cpp index 7a108f3baef..9aa41085094 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -64,7 +64,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) gid = entry.gr_gid; } - if (gid == 0) + if (gid == 0 && getgid() != 0) throw Exception("Group has id 0, but dropping privileges to gid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); if (0 != setgid(gid)) @@ -89,7 +89,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) uid = entry.pw_uid; } - if (uid == 0) + if (uid == 0 && getuid() != 0) throw Exception("User has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS); if (0 != setuid(uid)) From 0b5cfa4094516cc80747f57f6e49ceabfca13a60 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 25 May 2022 04:24:02 +0200 Subject: [PATCH 506/615] Fix split build --- programs/su/CMakeLists.txt | 2 +- programs/su/{su.cpp => clickhouse-su.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename programs/su/{su.cpp => clickhouse-su.cpp} (100%) diff --git a/programs/su/CMakeLists.txt b/programs/su/CMakeLists.txt index 1187deeeea7..df207e16f6e 100644 --- a/programs/su/CMakeLists.txt +++ b/programs/su/CMakeLists.txt @@ -1,3 +1,3 @@ -set (CLICKHOUSE_SU_SOURCES su.cpp) +set (CLICKHOUSE_SU_SOURCES clickhouse-su.cpp) set (CLICKHOUSE_SU_LINK PRIVATE dbms) clickhouse_program_add(su) diff --git a/programs/su/su.cpp b/programs/su/clickhouse-su.cpp similarity index 100% rename from programs/su/su.cpp rename to programs/su/clickhouse-su.cpp From b044d44fef1a97fa8b795821a201a41dba3a9d3a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 24 May 2022 14:03:14 +0200 Subject: [PATCH 507/615] Refactoring: Make template instantiation easier to read - introduced class MatchTraits with enums that replace bool template parameters - (minor: made negation the last template parameters because negation executes last during evaluation) --- src/Functions/MatchImpl.h | 55 ++++++++++++++++++++++--------- src/Functions/MultiMatchAnyImpl.h | 2 +- src/Functions/ilike.cpp | 2 +- src/Functions/like.h | 2 +- src/Functions/match.cpp | 2 +- src/Functions/notILike.cpp | 2 +- src/Functions/notLike.cpp | 2 +- 7 files changed, 45 insertions(+), 22 deletions(-) diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 54aaa3116fd..e1c6b95d357 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -63,14 +63,33 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res) } -/** 'like' - if true, treat pattern as SQL LIKE, otherwise as re2 regexp. - * 'negate' - if true, negate result - * 'case_insensitive' - if true, match case insensitively - * - * NOTE: We want to run regexp search for whole columns by one call (as implemented in function 'position') - * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. - */ -template +// For more readable instantiations of MatchImpl<> +struct MatchTraits +{ +enum class Syntax +{ + Like, + Re2 +}; + +enum class Case +{ + Sensitive, + Insensitive +}; + +enum class Result +{ + DontNegate, + Negate +}; +}; + +/** + * NOTE: We want to run regexp search for whole columns by one call (as implemented in function 'position') + * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. + */ +template struct MatchImpl { static constexpr bool use_default_implementation_for_constants = true; @@ -81,6 +100,10 @@ struct MatchImpl using ResultType = UInt8; + static constexpr bool is_like = (syntax_ == MatchTraits::Syntax::Like); + static constexpr bool case_insensitive = (case_ == MatchTraits::Case::Insensitive); + static constexpr bool negate = (result_ == MatchTraits::Result::Negate); + using Searcher = std::conditional_t; @@ -101,7 +124,7 @@ struct MatchImpl /// A simple case where the [I]LIKE expression reduces to finding a substring in a string String strstr_pattern; - if (like && impl::likePatternIsSubstring(needle, strstr_pattern)) + if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern)) { const UInt8 * const begin = haystack_data.data(); const UInt8 * const end = haystack_data.data() + haystack_data.size(); @@ -139,7 +162,7 @@ struct MatchImpl } else { - auto regexp = Regexps::get(needle); + auto regexp = Regexps::get(needle); String required_substring; bool is_trivial; @@ -252,7 +275,7 @@ struct MatchImpl /// A simple case where the LIKE expression reduces to finding a substring in a string String strstr_pattern; - if (like && impl::likePatternIsSubstring(needle, strstr_pattern)) + if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern)) { const UInt8 * const begin = haystack.data(); const UInt8 * const end = haystack.data() + haystack.size(); @@ -295,7 +318,7 @@ struct MatchImpl } else { - auto regexp = Regexps::get(needle); + auto regexp = Regexps::get(needle); String required_substring; bool is_trivial; @@ -440,7 +463,7 @@ struct MatchImpl reinterpret_cast(cur_needle_data), cur_needle_length); - if (like && impl::likePatternIsSubstring(needle, required_substr)) + if (is_like && impl::likePatternIsSubstring(needle, required_substr)) { if (required_substr.size() > cur_haystack_length) res[i] = negate; @@ -457,7 +480,7 @@ struct MatchImpl // each row is expected to contain a different like/re2 pattern // --> bypass the regexp cache, instead construct the pattern on-the-fly const int flags = Regexps::buildRe2Flags(); - const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); + const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); @@ -557,7 +580,7 @@ struct MatchImpl reinterpret_cast(cur_needle_data), cur_needle_length); - if (like && impl::likePatternIsSubstring(needle, required_substr)) + if (is_like && impl::likePatternIsSubstring(needle, required_substr)) { if (required_substr.size() > cur_haystack_length) res[i] = negate; @@ -574,7 +597,7 @@ struct MatchImpl // each row is expected to contain a different like/re2 pattern // --> bypass the regexp cache, instead construct the pattern on-the-fly const int flags = Regexps::buildRe2Flags(); - const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); + const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 595a3c8de5b..8a65c8cb2b4 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -120,7 +120,7 @@ struct MultiMatchAnyImpl memset(accum.data(), 0, accum.size()); for (size_t j = 0; j < needles.size(); ++j) { - MatchImpl::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); + MatchImpl::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); for (size_t i = 0; i < res.size(); ++i) { if constexpr (FindAny) diff --git a/src/Functions/ilike.cpp b/src/Functions/ilike.cpp index 1222cc48d07..b88d01986d5 100644 --- a/src/Functions/ilike.cpp +++ b/src/Functions/ilike.cpp @@ -12,7 +12,7 @@ struct NameILike static constexpr auto name = "ilike"; }; -using ILikeImpl = MatchImpl; +using ILikeImpl = MatchImpl; using FunctionILike = FunctionsStringSearch; } diff --git a/src/Functions/like.h b/src/Functions/like.h index edb738d393b..9e25fc6f4c0 100644 --- a/src/Functions/like.h +++ b/src/Functions/like.h @@ -11,7 +11,7 @@ struct NameLike static constexpr auto name = "like"; }; -using LikeImpl = MatchImpl; +using LikeImpl = MatchImpl; using FunctionLike = FunctionsStringSearch; } diff --git a/src/Functions/match.cpp b/src/Functions/match.cpp index 4c329701464..a0789f229fd 100644 --- a/src/Functions/match.cpp +++ b/src/Functions/match.cpp @@ -13,7 +13,7 @@ struct NameMatch static constexpr auto name = "match"; }; -using FunctionMatch = FunctionsStringSearch>; +using FunctionMatch = FunctionsStringSearch>; } diff --git a/src/Functions/notILike.cpp b/src/Functions/notILike.cpp index b5e06ac55f4..5e78db1c518 100644 --- a/src/Functions/notILike.cpp +++ b/src/Functions/notILike.cpp @@ -12,7 +12,7 @@ struct NameNotILike static constexpr auto name = "notILike"; }; -using NotILikeImpl = MatchImpl; +using NotILikeImpl = MatchImpl; using FunctionNotILike = FunctionsStringSearch; } diff --git a/src/Functions/notLike.cpp b/src/Functions/notLike.cpp index 7fa1b6f9122..33a36748bb1 100644 --- a/src/Functions/notLike.cpp +++ b/src/Functions/notLike.cpp @@ -12,7 +12,7 @@ struct NameNotLike static constexpr auto name = "notLike"; }; -using FunctionNotLike = FunctionsStringSearch>; +using FunctionNotLike = FunctionsStringSearch>; } From 35bef17302761a287c7b9ec14c441821da710894 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 24 May 2022 20:55:04 +0200 Subject: [PATCH 508/615] Introduce variables to hold the match result --> nicer when debugging --- src/Functions/MatchImpl.h | 122 +++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 62 deletions(-) diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index e1c6b95d357..eec6bdaa329 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -184,14 +184,14 @@ struct MatchImpl size_t prev_offset = 0; for (size_t i = 0; i < haystack_size; ++i) { - res[i] = negate - ^ regexp->getRE2()->Match( - {reinterpret_cast(&haystack_data[prev_offset]), haystack_offsets[i] - prev_offset - 1}, - 0, - haystack_offsets[i] - prev_offset - 1, - re2_st::RE2::UNANCHORED, - nullptr, - 0); + const bool match = regexp->getRE2()->Match( + {reinterpret_cast(&haystack_data[prev_offset]), haystack_offsets[i] - prev_offset - 1}, + 0, + haystack_offsets[i] - prev_offset - 1, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + res[i] = negate ^ match; prev_offset = haystack_offsets[i]; } @@ -239,14 +239,14 @@ struct MatchImpl const size_t start_pos = (required_substring_is_prefix) ? (reinterpret_cast(pos) - str_data) : 0; const size_t end_pos = str_size; - res[i] = negate - ^ regexp->getRE2()->Match( - {str_data, str_size}, - start_pos, - end_pos, - re2_st::RE2::UNANCHORED, - nullptr, - 0); + const bool match = regexp->getRE2()->Match( + {str_data, str_size}, + start_pos, + end_pos, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + res[i] = negate ^ match; } } else @@ -340,14 +340,14 @@ struct MatchImpl size_t offset = 0; for (size_t i = 0; i < haystack_size; ++i) { - res[i] = negate - ^ regexp->getRE2()->Match( - {reinterpret_cast(&haystack[offset]), N}, - 0, - N, - re2_st::RE2::UNANCHORED, - nullptr, - 0); + const bool match = regexp->getRE2()->Match( + {reinterpret_cast(&haystack[offset]), N}, + 0, + N, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + res[i] = negate ^ match; offset += N; } @@ -398,14 +398,14 @@ struct MatchImpl const size_t start_pos = (required_substring_is_prefix) ? (reinterpret_cast(pos) - str_data) : 0; const size_t end_pos = N; - res[i] = negate - ^ regexp->getRE2()->Match( + const bool match = regexp->getRE2()->Match( {str_data, N}, start_pos, end_pos, re2_st::RE2::UNANCHORED, nullptr, 0); + res[i] = negate ^ match; } } else @@ -471,8 +471,7 @@ struct MatchImpl { Searcher searcher(required_substr.data(), required_substr.size(), cur_haystack_length); const auto * match = searcher.search(cur_haystack_data, cur_haystack_length); - res[i] = negate - ^ (match != cur_haystack_data + cur_haystack_length); + res[i] = negate ^ (match != cur_haystack_data + cur_haystack_length); } } else @@ -492,14 +491,14 @@ struct MatchImpl } else { - res[i] = negate - ^ regexp.getRE2()->Match( - {reinterpret_cast(cur_haystack_data), cur_haystack_length}, - 0, - cur_haystack_length, - re2_st::RE2::UNANCHORED, - nullptr, - 0); + const bool match = regexp.getRE2()->Match( + {reinterpret_cast(cur_haystack_data), cur_haystack_length}, + 0, + cur_haystack_length, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + res[i] = negate ^ match; } } else @@ -522,14 +521,14 @@ struct MatchImpl const size_t start_pos = (required_substring_is_prefix) ? (match - cur_haystack_data) : 0; const size_t end_pos = cur_haystack_length; - res[i] = negate - ^ regexp.getRE2()->Match( - {reinterpret_cast(cur_haystack_data), cur_haystack_length}, - start_pos, - end_pos, - re2_st::RE2::UNANCHORED, - nullptr, - 0); + const bool match2 = regexp.getRE2()->Match( + {reinterpret_cast(cur_haystack_data), cur_haystack_length}, + start_pos, + end_pos, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + res[i] = negate ^ match2; } } } @@ -588,8 +587,7 @@ struct MatchImpl { Searcher searcher(required_substr.data(), required_substr.size(), cur_haystack_length); const auto * match = searcher.search(cur_haystack_data, cur_haystack_length); - res[i] = negate - ^ (match != cur_haystack_data + cur_haystack_length); + res[i] = negate ^ (match != cur_haystack_data + cur_haystack_length); } } else @@ -609,14 +607,14 @@ struct MatchImpl } else { - res[i] = negate - ^ regexp.getRE2()->Match( - {reinterpret_cast(cur_haystack_data), cur_haystack_length}, - 0, - cur_haystack_length, - re2_st::RE2::UNANCHORED, - nullptr, - 0); + const bool match = regexp.getRE2()->Match( + {reinterpret_cast(cur_haystack_data), cur_haystack_length}, + 0, + cur_haystack_length, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + res[i] = negate ^ match; } } else @@ -639,14 +637,14 @@ struct MatchImpl const size_t start_pos = (required_substring_is_prefix) ? (match - cur_haystack_data) : 0; const size_t end_pos = cur_haystack_length; - res[i] = negate - ^ regexp.getRE2()->Match( - {reinterpret_cast(cur_haystack_data), cur_haystack_length}, - start_pos, - end_pos, - re2_st::RE2::UNANCHORED, - nullptr, - 0); + const bool match2 = regexp.getRE2()->Match( + {reinterpret_cast(cur_haystack_data), cur_haystack_length}, + start_pos, + end_pos, + re2_st::RE2::UNANCHORED, + nullptr, + 0); + res[i] = negate ^ match2; } } } From 040fbf3686c45c8dd353bf3f45f58c7c93b5dfbf Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 24 May 2022 21:16:47 +0200 Subject: [PATCH 509/615] Tighter sanity checks in matching code --- src/Functions/MatchImpl.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index eec6bdaa329..003abe10a89 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -115,9 +115,13 @@ struct MatchImpl const ColumnPtr & start_pos_, PaddedPODArray & res) { + const size_t haystack_size = haystack_offsets.size(); + + if (haystack_size != res.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Function '{}' unexpectedly received a different number of haystacks and results", name); + if (start_pos_ != nullptr) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function '{}' doesn't support start_pos argument", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' doesn't support start_pos argument", name); if (haystack_offsets.empty()) return; @@ -170,8 +174,6 @@ struct MatchImpl regexp->getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix); - size_t haystack_size = haystack_offsets.size(); - if (required_substring.empty()) { if (!regexp->getRE2()) /// An empty regexp. Always matches. @@ -270,6 +272,11 @@ struct MatchImpl const String & needle, PaddedPODArray & res) { + const size_t haystack_size = haystack.size() / N; + + if (haystack_size != res.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Function '{}' unexpectedly received a different number of haystacks and results", name); + if (haystack.empty()) return; @@ -326,8 +333,6 @@ struct MatchImpl regexp->getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix); - const size_t haystack_size = haystack.size() / N; - if (required_substring.empty()) { if (!regexp->getRE2()) /// An empty regexp. Always matches. @@ -433,13 +438,11 @@ struct MatchImpl { const size_t haystack_size = haystack_offsets.size(); - if (haystack_size != needle_offset.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Function '{}' unexpectedly received a different number of haystacks and needles", name); + if (haystack_size != needle_offset.size() || haystack_size != res.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Function '{}' unexpectedly received a different number of haystacks, needles and results", name); if (start_pos_ != nullptr) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function '{}' doesn't support start_pos argument", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' doesn't support start_pos argument", name); if (haystack_offsets.empty()) return; @@ -549,13 +552,11 @@ struct MatchImpl { const size_t haystack_size = haystack.size()/N; - if (haystack_size != needle_offset.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Function '{}' unexpectedly received a different number of haystacks and needles", name); + if (haystack_size != needle_offset.size() || haystack_size != res.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Function '{}' unexpectedly received a different number of haystacks, needles and results", name); if (start_pos_ != nullptr) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function '{}' doesn't support start_pos argument", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' doesn't support start_pos argument", name); if (haystack.empty()) return; From e8c96777f6eb180fd220eff7afec3712eca1b3eb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 24 May 2022 21:26:45 +0200 Subject: [PATCH 510/615] Make OptimizedRegularExpression::analyze() private --- src/Common/OptimizedRegularExpression.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index a3d38f27c07..bbb1b0d5eda 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -86,8 +86,6 @@ public: /// Get the regexp re2 or nullptr if the pattern is trivial (for output to the log). const std::unique_ptr & getRE2() const { return re2; } - static void analyze(const std::string & regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix); - void getAnalyzeResult(std::string & out_required_substring, bool & out_is_trivial, bool & out_required_substring_is_prefix) const { out_required_substring = required_substring; @@ -104,6 +102,8 @@ private: std::optional> case_insensitive_substring_searcher; std::unique_ptr re2; unsigned number_of_subpatterns; + + static void analyze(const std::string & regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix); }; using OptimizedRegularExpression = OptimizedRegularExpressionImpl; From 01ab7b9bad008c8c0175327b0ac58aa719077018 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 24 May 2022 21:29:43 +0200 Subject: [PATCH 511/615] Pass strings in some places as string_view The original goal was to get change const auto & needle = String( reinterpret_cast(cur_needle_data), cur_needle_length); in Functions/MatchImpl.h into a std::string_view to save an allocation + copy. The needle is eventually passed as search pattern into the re2 library. Re2 has an alternative constructor taking a const char * i.e. a NULL-terminated string. Here, the needle is NULL-terminated but 1. this is only because it is passed inside a ColumnString yet this is not always the case (e.g. fixed string columns has a dense layout w/o NULL terminator). 2. assuming NULL termination for users != MatchImpl of the regex code is too dangerous. So, for now we'll stay with copying to be on the safe side. One fine day when re2 has a ptr/size ctor, we can use std::string_view. Just changing a few other places from std::string to std::string_view but this will not help with performance. --- src/Common/OptimizedRegularExpression.cpp | 2 +- src/Common/OptimizedRegularExpression.h | 2 +- src/Functions/MatchImpl.h | 4 ++-- src/Functions/likePatternToRegexp.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index da348adbe31..cfc364929a3 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -17,7 +17,7 @@ namespace DB template void OptimizedRegularExpressionImpl::analyze( - const std::string & regexp, + std::string_view regexp, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix) diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index bbb1b0d5eda..eaa7b06e309 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -103,7 +103,7 @@ private: std::unique_ptr re2; unsigned number_of_subpatterns; - static void analyze(const std::string & regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix); + static void analyze(std::string_view regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix); }; using OptimizedRegularExpression = OptimizedRegularExpressionImpl; diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 003abe10a89..15cf032aedc 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -481,7 +481,7 @@ struct MatchImpl { // each row is expected to contain a different like/re2 pattern // --> bypass the regexp cache, instead construct the pattern on-the-fly - const int flags = Regexps::buildRe2Flags(); + const int flags = Regexps::buildRe2Flags(); const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); @@ -595,7 +595,7 @@ struct MatchImpl { // each row is expected to contain a different like/re2 pattern // --> bypass the regexp cache, instead construct the pattern on-the-fly - const int flags = Regexps::buildRe2Flags(); + const int flags = Regexps::buildRe2Flags(); const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); diff --git a/src/Functions/likePatternToRegexp.h b/src/Functions/likePatternToRegexp.h index 15e38e61ab4..319a3729e16 100644 --- a/src/Functions/likePatternToRegexp.h +++ b/src/Functions/likePatternToRegexp.h @@ -6,7 +6,7 @@ namespace DB { /// Transforms the [I]LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$ -inline String likePatternToRegexp(const String & pattern) +inline String likePatternToRegexp(std::string_view pattern) { String res; res.reserve(pattern.size() * 2); From 05e4fa7df1b08d507dcc4dd4f07beaeaed98b7fb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 24 May 2022 22:59:48 +0200 Subject: [PATCH 512/615] Fix special case of trivial regexp Previously, we would alsays set 1 in case of a trivial regex (which is correct). If someone in future builds a negated operator, then this will produce wrong results. Right now, negation of regexp (SQL: NOT MATCH) is implemented at a higher level, so we are safe and this is more a preventive fix. --- src/Functions/MatchImpl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 15cf032aedc..17bda74f8ab 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -179,7 +179,7 @@ struct MatchImpl if (!regexp->getRE2()) /// An empty regexp. Always matches. { if (haystack_size) - memset(res.data(), 1, haystack_size * sizeof(res[0])); + memset(res.data(), !negate, haystack_size * sizeof(res[0])); } else { @@ -338,7 +338,7 @@ struct MatchImpl if (!regexp->getRE2()) /// An empty regexp. Always matches. { if (haystack_size) - memset(res.data(), 1, haystack_size * sizeof(res[0])); + memset(res.data(), !negate, haystack_size * sizeof(res[0])); } else { @@ -490,7 +490,7 @@ struct MatchImpl { if (!regexp.getRE2()) /// An empty regexp. Always matches. { - res[i] = 1; + res[i] = !negate; } else { @@ -604,7 +604,7 @@ struct MatchImpl { if (!regexp.getRE2()) /// An empty regexp. Always matches. { - res[i] = 1; + res[i] = !negate; } else { From e3f76cab55d55330b2208545656e0780a19834f5 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 16:54:28 +0800 Subject: [PATCH 513/615] impl improve remote fs cache --- src/Common/FileCache.cpp | 54 +++++++++++++++++++++++++++----- src/Common/FileCache.h | 10 ++++++ src/Common/FileCacheSettings.cpp | 1 + src/Common/FileCacheSettings.h | 2 ++ src/Common/FileCache_fwd.h | 1 + src/Common/FileSegment.cpp | 9 ++++++ 6 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index ae1b1afdd09..4a7cea9f004 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -72,6 +72,8 @@ void IFileCache::assertInitialized() const LRUFileCache::LRUFileCache(const String & cache_base_path_, const FileCacheSettings & cache_settings_) : IFileCache(cache_base_path_, cache_settings_) + , max_stash_element_size(cache_settings_.max_elements) + , enable_cache_hits_threshold(cache_settings_.enable_cache_hits_threshold) , log(&Poco::Logger::get("LRUFileCache")) { } @@ -404,9 +406,46 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( "Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}", keyToStr(key), offset, size, dumpStructureUnlocked(key, cache_lock)); - auto file_segment = std::make_shared(offset, size, key, this, state); - FileSegmentCell cell(std::move(file_segment), this, cache_lock); + auto skip_or_download = [&]() -> FileSegmentPtr + { + if (state == FileSegment::State::EMPTY) + { + LOG_TEST(log, "[addCell] FileSegment key:{}, offset:{}, state:{}, enable_cache_hits:{}, current_element_size:{}/{}.", + keyToStr(key), offset, FileSegment::stateToString(state), enable_cache_hits_threshold, stash_queue.getElementsNum(cache_lock), max_stash_element_size); + + auto record = records.find({key, offset}); + if (record == records.end()) + { + auto queue_iter = stash_queue.add(key, offset, 0, cache_lock); + records.insert({{key, offset}, queue_iter}); + + if (stash_queue.getElementsNum(cache_lock) > max_stash_element_size) + { + auto remove_queue_iter = stash_queue.begin(); + records.erase({remove_queue_iter->key, remove_queue_iter->offset}); + stash_queue.remove(remove_queue_iter, cache_lock); + } + /// For segments that do not reach the download threshold, we do not download them, but directly read them + return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); + } + else + { + auto queue_iter = record->second; + queue_iter->hits++; + stash_queue.moveToEnd(queue_iter, cache_lock); + + if (queue_iter->hits >= enable_cache_hits_threshold) + return std::make_shared(offset, size, key, this, FileSegment::State::EMPTY); + else + return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); + } + } + else + return std::make_shared(offset, size, key, this, state); + }; + + FileSegmentCell cell(skip_or_download(), this, cache_lock); auto & offsets = files[key]; if (offsets.empty()) @@ -471,7 +510,7 @@ bool LRUFileCache::tryReserve( std::vector to_evict; std::vector trash; - for (const auto & [entry_key, entry_offset, entry_size] : queue) + for (const auto & [entry_key, entry_offset, entry_size, entry_hits] : queue) { if (!is_overflow()) break; @@ -619,7 +658,7 @@ void LRUFileCache::remove() std::vector to_remove; for (auto it = queue.begin(); it != queue.end();) { - const auto & [key, offset, size] = *it++; + const auto & [key, offset, size, hits] = *it++; auto * cell = getCell(key, offset, cache_lock); if (!cell) throw Exception( @@ -882,6 +921,7 @@ LRUFileCache::FileSegmentCell::FileSegmentCell( queue_iterator = cache->queue.add(file_segment->key(), file_segment->offset(), file_segment->range().size(), cache_lock); break; } + case FileSegment::State::SKIP_CACHE: case FileSegment::State::EMPTY: case FileSegment::State::DOWNLOADING: { @@ -934,7 +974,7 @@ bool LRUFileCache::LRUQueue::contains( { /// This method is used for assertions in debug mode. /// So we do not care about complexity here. - for (const auto & [entry_key, entry_offset, size] : queue) + for (const auto & [entry_key, entry_offset, size, hits] : queue) { if (key == entry_key && offset == entry_offset) return true; @@ -947,7 +987,7 @@ void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_g [[maybe_unused]] size_t total_size = 0; for (auto it = queue.begin(); it != queue.end();) { - auto & [key, offset, size] = *it++; + auto & [key, offset, size, hits] = *it++; auto * cell = cache->getCell(key, offset, cache_lock); if (!cell) @@ -969,7 +1009,7 @@ void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_g String LRUFileCache::LRUQueue::toString(std::lock_guard & /* cache_lock */) const { String result; - for (const auto & [key, offset, size] : queue) + for (const auto & [key, offset, size, hits] : queue) { if (!result.empty()) result += ", "; diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index ff65b579470..42feb9727b6 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -165,6 +165,7 @@ private: Key key; size_t offset; size_t size; + size_t hits = 0; FileKeyAndOffset(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) {} }; @@ -223,8 +224,17 @@ private: using FileSegmentsByOffset = std::map; using CachedFiles = std::unordered_map; + using AccessKeyAndOffset = std::pair; + using AccessRecord = std::map; + CachedFiles files; LRUQueue queue; + + LRUQueue stash_queue; + AccessRecord records; + size_t max_stash_element_size; + size_t enable_cache_hits_threshold; + Poco::Logger * log; FileSegments getImpl( diff --git a/src/Common/FileCacheSettings.cpp b/src/Common/FileCacheSettings.cpp index f555de277b2..6982068e40f 100644 --- a/src/Common/FileCacheSettings.cpp +++ b/src/Common/FileCacheSettings.cpp @@ -11,6 +11,7 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & max_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false); + enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD); } } diff --git a/src/Common/FileCacheSettings.h b/src/Common/FileCacheSettings.h index 0b34e1e3d82..2f508c3ef46 100644 --- a/src/Common/FileCacheSettings.h +++ b/src/Common/FileCacheSettings.h @@ -14,6 +14,8 @@ struct FileCacheSettings size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE; bool cache_on_write_operations = false; + size_t enable_cache_hits_threshold = REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD; + void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Common/FileCache_fwd.h b/src/Common/FileCache_fwd.h index 7448f0c8c89..8a7c2eeb458 100644 --- a/src/Common/FileCache_fwd.h +++ b/src/Common/FileCache_fwd.h @@ -7,6 +7,7 @@ namespace DB static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE = 1024 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024; +static constexpr int REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD = 0; class IFileCache; using FileCachePtr = std::shared_ptr; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 356ba8bf55f..4b8ce9d4b77 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -59,6 +59,10 @@ FileSegment::FileSegment( downloader_id = getCallerId(); break; } + case (State::SKIP_CACHE): + { + break; + } default: { throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state"); @@ -525,6 +529,11 @@ void FileSegment::complete(std::lock_guard & cache_lock) void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std::lock_guard & segment_lock) { + bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); + + if (is_last_holder && download_state == State::SKIP_CACHE) + cache->remove(key(), offset(), cache_lock, segment_lock); + if (download_state == State::SKIP_CACHE || is_detached) return; From 1ce219bae24579457aa4081787200709f18ced18 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 17:24:38 +0800 Subject: [PATCH 514/615] fix --- src/Common/FileCache.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 4a7cea9f004..bbd1d491685 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -426,19 +426,19 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( records.erase({remove_queue_iter->key, remove_queue_iter->offset}); stash_queue.remove(remove_queue_iter, cache_lock); } + /// For segments that do not reach the download threshold, we do not download them, but directly read them - return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); + state = queue_iter->hits >= enable_cache_hits_threshold ? FileSegment::State::EMPTY : FileSegment::State::SKIP_CACHE; + return std::make_shared(offset, size, key, this, state); } else { auto queue_iter = record->second; queue_iter->hits++; stash_queue.moveToEnd(queue_iter, cache_lock); - - if (queue_iter->hits >= enable_cache_hits_threshold) - return std::make_shared(offset, size, key, this, FileSegment::State::EMPTY); - else - return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); + + state = queue_iter->hits >= enable_cache_hits_threshold ? FileSegment::State::EMPTY : FileSegment::State::SKIP_CACHE; + return std::make_shared(offset, size, key, this, state); } } else From c372c3d6aab6493825519e429b33e2e95fbfbe37 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 25 May 2022 11:49:59 +0200 Subject: [PATCH 515/615] Fix performance tests --- src/Functions/GatherUtils/sliceHasImplAnyAll.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/GatherUtils/sliceHasImplAnyAll.h b/src/Functions/GatherUtils/sliceHasImplAnyAll.h index 9933e0d2c5f..21c80b742fd 100644 --- a/src/Functions/GatherUtils/sliceHasImplAnyAll.h +++ b/src/Functions/GatherUtils/sliceHasImplAnyAll.h @@ -898,11 +898,12 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAll(const FirstSliceType & first, const return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt64(first, second, first_null_map, second_null_map); } } - else if (isArchSupported(TargetArch::SSE42)) + + if (isArchSupported(TargetArch::SSE42)) { if constexpr (std::is_same_v> || std::is_same_v>) { - return TargetSpecific::SSE42::sliceHasImplAnyAllImplInt8(first, second, first_null_map, second_null_map); + return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt8(first, second, first_null_map, second_null_map); } else if constexpr (std::is_same_v> || std::is_same_v>) { From d0fcffec66aa96f0972a0262282d053aa6ffa852 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 17:51:03 +0800 Subject: [PATCH 516/615] fix style --- src/Common/FileCache.cpp | 3 --- src/Common/FileCache.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index bbd1d491685..efbe869db06 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -410,9 +410,6 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( { if (state == FileSegment::State::EMPTY) { - LOG_TEST(log, "[addCell] FileSegment key:{}, offset:{}, state:{}, enable_cache_hits:{}, current_element_size:{}/{}.", - keyToStr(key), offset, FileSegment::stateToString(state), enable_cache_hits_threshold, stash_queue.getElementsNum(cache_lock), max_stash_element_size); - auto record = records.find({key, offset}); if (record == records.end()) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index 42feb9727b6..b25a9d69249 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -234,7 +234,7 @@ private: AccessRecord records; size_t max_stash_element_size; size_t enable_cache_hits_threshold; - + Poco::Logger * log; FileSegments getImpl( From 90deef1c3c1869b2db7618f2ac4136df00d42bcb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 25 May 2022 12:18:47 +0200 Subject: [PATCH 517/615] Bump cctz to 2022-05-15 --- contrib/cctz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cctz b/contrib/cctz index 9edd0861d83..8c71d74bdf7 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 9edd0861d8328b2ae77e8fb5f4d7dcd1cf33b42b +Subproject commit 8c71d74bdf76c3fa401da845089ae60a6c0aeefa From c743fef3ae3ea204e4692eb575822603fa86d2ca Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 25 May 2022 13:38:16 +0200 Subject: [PATCH 518/615] Update 3rd party contribution guide - replace obsolete references to clickhouse-extra to clickhouse - generally rewrite the guide and make it easier to understand --- docs/en/development/contrib.md | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 7cbe32fdd8b..0a254f8c8ae 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -94,14 +94,11 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li [Example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) -## Guidelines for adding new third-party libraries and maintaining custom changes in them {#adding-third-party-libraries} +## Adding new third-party libraries and maintaining patches in third-party libraries {#adding-third-party-libraries} -1. All external third-party code should reside in the dedicated directories under `contrib` directory of ClickHouse repo. Prefer Git submodules, when available. -2. Fork/mirror the official repo in [Clickhouse-extras](https://github.com/ClickHouse-Extras). Prefer official GitHub repos, when available. -3. Branch from the branch you want to integrate, e.g., `master` -> `clickhouse/master`, or `release/vX.Y.Z` -> `clickhouse/release/vX.Y.Z`. -4. All forks in [Clickhouse-extras](https://github.com/ClickHouse-Extras) can be automatically synchronized with upstreams. `clickhouse/...` branches will remain unaffected, since virtually nobody is going to use that naming pattern in their upstream repos. -5. Add submodules under `contrib` of ClickHouse repo that refer the above forks/mirrors. Set the submodules to track the corresponding `clickhouse/...` branches. -6. Every time the custom changes have to be made in the library code, a dedicated branch should be created, like `clickhouse/my-fix`. Then this branch should be merged into the branch, that is tracked by the submodule, e.g., `clickhouse/master` or `clickhouse/release/vX.Y.Z`. -7. No code should be pushed in any branch of the forks in [Clickhouse-extras](https://github.com/ClickHouse-Extras), whose names do not follow `clickhouse/...` pattern. -8. Always write the custom changes with the official repo in mind. Once the PR is merged from (a feature/fix branch in) your personal fork into the fork in [Clickhouse-extras](https://github.com/ClickHouse-Extras), and the submodule is bumped in ClickHouse repo, consider opening another PR from (a feature/fix branch in) the fork in [Clickhouse-extras](https://github.com/ClickHouse-Extras) to the official repo of the library. This will make sure, that 1) the contribution has more than a single use case and importance, 2) others will also benefit from it, 3) the change will not remain a maintenance burden solely on ClickHouse developers. -9. When a submodule needs to start using a newer code from the original branch (e.g., `master`), and since the custom changes might be merged in the branch it is tracking (e.g., `clickhouse/master`) and so it may diverge from its original counterpart (i.e., `master`), a careful merge should be carried out first, i.e., `master` -> `clickhouse/master`, and only then the submodule can be bumped in ClickHouse. +1. Each third-party libary must reside in a dedicated directory under the `contrib/` directory of the ClickHouse repository. Avoid dumps/copies of external code, instead use Git's submodule feature to pull third-party code from an external upstream repository. +2. Submodules are listed in `.gitmodule`. If the external library can be used as-is, you may reference the upstream repository directly. Otherwise, i.e. the external libary requires patching/customization, create a fork of the official repository in the [Clickhouse organization in GitHub](https://github.com/ClickHouse). +3. In the latter case, create a branch with `clickhouse/` prefix from the branch you want to integrate, e.g. `clickhouse/master` (for `master`) or `clickhouse/release/vX.Y.Z` (for a `release/vX.Y.Z` tag). The purpose of this branch is to isolate customization of the library from upstream work. For example, pulls from the upstream repository into the fork will leave all `clickhouse/` branches unaffected. Submodules in `contrib/` must only track `clickhouse/` branches of forked third-party repositories. +4. To patch a fork of a third-party library, create a dedicated branch with `clickhouse/` prefix in the fork, e.g. `clickhouse/fix-some-desaster`. Finally, merge the patch branch into the custom tracking branch (e.g. `clickhouse/master` or `clickhouse/release/vX.Y.Z`) using a PR. +5. Always create patches of third-party libraries with the official repository in mind. Once a PR of a patch branch to the `clickhouse/` branch in the fork repository is done and the submodule version in ClickHouse's official repository is bumped, consider opening another PR from the patch branch to the upstream library repository. This ensures, that 1) the contribution has more than a single use case and importance, 2) others will also benefit from it, 3) the change will not remain a maintenance burden solely on ClickHouse developers. +9. To update a submodule with changes in the upstream repository, first merge upstream `master` (or a new `versionX.Y.Z` tag) into the `clickhouse`-tracking branch in the fork repository. Conflicts with patches/customization will need to be resolved in this merge (see Step 4.). Once the merge is done, bump the submodule in ClickHouse to point to the new hash in the fork. From 2211c1ddb8c4b07f6bacdbd51fc45fa93a9db78e Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 20:15:43 +0800 Subject: [PATCH 519/615] fix --- src/Common/FileCache.h | 12 +++++++++++- src/Common/FileSegment.cpp | 3 +-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index b25a9d69249..a367d47885b 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -225,7 +226,16 @@ private: using CachedFiles = std::unordered_map; using AccessKeyAndOffset = std::pair; - using AccessRecord = std::map; + + struct KeyAndOffsetHash + { + std::size_t operator()(const AccessKeyAndOffset & key) const + { + return std::hash()(key.first) ^ std::hash()(key.second); + } + }; + + using AccessRecord = std::unordered_map; CachedFiles files; LRUQueue queue; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 4b8ce9d4b77..aee3d470f44 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -551,8 +551,7 @@ void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std /// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the /// downloader or the only owner of the segment. - bool can_update_segment_state = isDownloaderImpl(segment_lock) - || cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); + bool can_update_segment_state = isDownloaderImpl(segment_lock) || is_last_holder; if (can_update_segment_state) download_state = State::PARTIALLY_DOWNLOADED; From 45da28ecaeb14d31ebf5cb5f191f22de744a308a Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 25 May 2022 14:22:22 +0200 Subject: [PATCH 520/615] Improve performance of geo distance functions --- src/Functions/greatCircleDistance.cpp | 39 ++++++++++++++++++++----- tests/performance/great_circle_dist.xml | 15 ++++++++-- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 9b0d2625914..857cc045b31 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -14,9 +15,11 @@ namespace DB { + namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; } /** Calculates the distance between two geographical locations. @@ -262,24 +265,46 @@ private: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - const IColumn & col_lon1 = *arguments[0].column; - const IColumn & col_lat1 = *arguments[1].column; - const IColumn & col_lon2 = *arguments[2].column; - const IColumn & col_lat2 = *arguments[3].column; + auto arguments_copy = arguments; + for (auto & argument : arguments_copy) { + argument.column = castColumn(argument, result_type); + argument.type = result_type; + } + + const auto * col_lon1 = convertArgumentColumnToFloat32(arguments_copy, 0); + const auto * col_lat1 = convertArgumentColumnToFloat32(arguments_copy, 1); + const auto * col_lon2 = convertArgumentColumnToFloat32(arguments_copy, 2); + const auto * col_lat2 = convertArgumentColumnToFloat32(arguments_copy, 3); for (size_t row_num = 0; row_num < input_rows_count; ++row_num) + { dst_data[row_num] = distance( - col_lon1.getFloat32(row_num), col_lat1.getFloat32(row_num), - col_lon2.getFloat32(row_num), col_lat2.getFloat32(row_num)); + col_lon1->getData()[row_num], col_lat1->getData()[row_num], + col_lon2->getData()[row_num], col_lat2->getData()[row_num]); + } return dst; } + + const ColumnFloat32 * convertArgumentColumnToFloat32(const ColumnsWithTypeAndName & arguments, size_t argument_index) const + { + const auto * column_typed = checkAndGetColumn(arguments[argument_index].column.get()); + if (!column_typed) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float32.", + arguments[argument_index].type->getName(), + argument_index + 1, + getName()); + + return column_typed; + } }; ) // DECLARE_MULTITARGET_CODE diff --git a/tests/performance/great_circle_dist.xml b/tests/performance/great_circle_dist.xml index ad445f34417..62666e2d934 100644 --- a/tests/performance/great_circle_dist.xml +++ b/tests/performance/great_circle_dist.xml @@ -1,7 +1,18 @@ + + + func + + greatCircleDistance + greatCircleAngle + geoDistance + + + + - SELECT count() FROM numbers(1000000) WHERE NOT ignore(greatCircleDistance((rand(1) % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand(2)) / 4294967296 - 180, (rand(3) % 180) * 1. - 90)) + SELECT count() FROM numbers(10000000) WHERE NOT ignore({func}((rand(1) % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand(2)) / 4294967296 - 180, (rand(3) % 180) * 1. - 90)) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(greatCircleDistance(55. + toFloat64(rand(1)) / 4294967296, 37. + toFloat64(rand(2)) / 4294967296, 55. + toFloat64(rand(3)) / 4294967296, 37. + toFloat64(rand(4)) / 4294967296)) + SELECT count() FROM zeros(100000000) WHERE NOT ignore({func}(55. + toFloat64(rand(1)) / 4294967296, 37. + toFloat64(rand(2)) / 4294967296, 55. + toFloat64(rand(3)) / 4294967296, 37. + toFloat64(rand(4)) / 4294967296)) From adbb8211766f586705483a6a2c6fca81e858e36b Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 21:05:15 +0800 Subject: [PATCH 521/615] fix --- src/Common/FileCache.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index efbe869db06..714fa8d737e 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -507,7 +507,7 @@ bool LRUFileCache::tryReserve( std::vector to_evict; std::vector trash; - for (const auto & [entry_key, entry_offset, entry_size, entry_hits] : queue) + for (const auto & [entry_key, entry_offset, entry_size, _] : queue) { if (!is_overflow()) break; @@ -655,7 +655,7 @@ void LRUFileCache::remove() std::vector to_remove; for (auto it = queue.begin(); it != queue.end();) { - const auto & [key, offset, size, hits] = *it++; + const auto & [key, offset, size, _] = *it++; auto * cell = getCell(key, offset, cache_lock); if (!cell) throw Exception( @@ -935,7 +935,7 @@ LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add( const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard & /* cache_lock */) { #ifndef NDEBUG - for (const auto & [entry_key, entry_offset, _] : queue) + for (const auto & [entry_key, entry_offset, _, _] : queue) { if (entry_key == key && entry_offset == offset) throw Exception( @@ -971,7 +971,7 @@ bool LRUFileCache::LRUQueue::contains( { /// This method is used for assertions in debug mode. /// So we do not care about complexity here. - for (const auto & [entry_key, entry_offset, size, hits] : queue) + for (const auto & [entry_key, entry_offset, size, _] : queue) { if (key == entry_key && offset == entry_offset) return true; @@ -984,7 +984,7 @@ void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_g [[maybe_unused]] size_t total_size = 0; for (auto it = queue.begin(); it != queue.end();) { - auto & [key, offset, size, hits] = *it++; + auto & [key, offset, size, _] = *it++; auto * cell = cache->getCell(key, offset, cache_lock); if (!cell) @@ -1006,7 +1006,7 @@ void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_g String LRUFileCache::LRUQueue::toString(std::lock_guard & /* cache_lock */) const { String result; - for (const auto & [key, offset, size, hits] : queue) + for (const auto & [key, offset, size, _] : queue) { if (!result.empty()) result += ", "; From 875557abc23ddd4ad8da81d75e5bf1a1662c8faa Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 21:53:28 +0800 Subject: [PATCH 522/615] fix --- src/Common/FileCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 714fa8d737e..c28794ab23e 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -935,7 +935,7 @@ LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add( const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard & /* cache_lock */) { #ifndef NDEBUG - for (const auto & [entry_key, entry_offset, _, _] : queue) + for (const auto & [entry_key, entry_offset, _, __] : queue) { if (entry_key == key && entry_offset == offset) throw Exception( From e67b3537f741f5590e5fbe32789d78b4a83fa072 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 25 May 2022 15:54:52 +0200 Subject: [PATCH 523/615] Functions normalizeUTF8 unstable performance tests fix --- src/Functions/normalizeString.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/normalizeString.cpp b/src/Functions/normalizeString.cpp index 9b1d1292d2c..625247ca432 100644 --- a/src/Functions/normalizeString.cpp +++ b/src/Functions/normalizeString.cpp @@ -95,6 +95,8 @@ struct NormalizeUTF8Impl size_t size = offsets.size(); res_offsets.resize(size); + res_data.reserve(data.size() * 2); + ColumnString::Offset current_from_offset = 0; ColumnString::Offset current_to_offset = 0; From 6370c290491981238ca3e6eeafd4f4cb3e8a5184 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 25 May 2022 14:13:12 +0000 Subject: [PATCH 524/615] Use a separate mutex for query_factories_info in Context. --- src/Interpreters/Context.cpp | 2 +- src/Interpreters/Context.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0ef024f7f47..e0f7645b48e 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1079,7 +1079,7 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query factories info"); - auto lock = getLock(); + std::lock_guard lock(query_factories_info_mutex); switch (factory_type) { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index dbddda39aad..cf808dff582 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -302,8 +302,9 @@ private: std::unordered_set table_functions; }; - /// Needs to be chandged while having const context in factories methods + /// Needs to be changed while having const context in factories methods mutable QueryFactoriesInfo query_factories_info; + mutable std::mutex query_factories_info_mutex; /// TODO: maybe replace with temporary tables? StoragePtr view_source; /// Temporary StorageValues used to generate alias columns for materialized views From 28355114c05289d9593830f740b907b4c188b511 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 25 May 2022 16:19:29 +0200 Subject: [PATCH 525/615] Fixed tests --- src/Functions/greatCircleDistance.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 857cc045b31..7935510ad41 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -273,6 +273,7 @@ private: auto arguments_copy = arguments; for (auto & argument : arguments_copy) { + argument.column = argument.column->convertToFullColumnIfConst(); argument.column = castColumn(argument, result_type); argument.type = result_type; } From fda6ddeffa608611947d9c0fab263bca9f3a2660 Mon Sep 17 00:00:00 2001 From: msaf1980 Date: Wed, 25 May 2022 19:23:05 +0500 Subject: [PATCH 526/615] cleanup StorageHDFS (unused variables) --- src/Storages/HDFS/StorageHDFS.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index c8ebbfcfaac..1f9e57ab2b7 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -132,8 +132,6 @@ private: Block block_for_format; std::vector requested_virtual_columns; UInt64 max_block_size; - bool need_path_column; - bool need_file_column; std::shared_ptr file_iterator; ColumnsDescription columns_description; From 4f09a0c431477d7397c45d567b6cf5fec6eda80d Mon Sep 17 00:00:00 2001 From: Igor Nikonov <954088+devcrafter@users.noreply.github.com> Date: Wed, 25 May 2022 16:27:17 +0200 Subject: [PATCH 527/615] Update architecture.md Updated broken links in Functions section --- docs/en/development/architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index f6d7d6bc5e7..b393c327316 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -121,7 +121,7 @@ There are ordinary functions and aggregate functions. For aggregate functions, s Ordinary functions do not change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`’s of data to implement vectorized query execution. -There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulate), that exploit block processing and violate the independence of rows. +There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#blocksize-function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#rownumberinblock-function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulate-runningaccumulate), that exploit block processing and violate the independence of rows. ClickHouse has strong typing, so there’s no implicit type conversion. If a function does not support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function. From 6a962549d520e8af5734a0485a0ba749e53509eb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 25 May 2022 16:45:32 +0200 Subject: [PATCH 528/615] Revert "Add support for preprocessing ZooKeeper operations in `clickhouse-keeper`" --- src/Coordination/KeeperServer.cpp | 17 - src/Coordination/KeeperStateMachine.cpp | 24 +- src/Coordination/KeeperStateMachine.h | 8 +- src/Coordination/KeeperStorage.cpp | 1658 ++++++----------- src/Coordination/KeeperStorage.h | 248 +-- .../WriteBufferFromNuraftBuffer.h | 1 + src/Coordination/ZooKeeperDataReader.cpp | 1 - src/Coordination/tests/gtest_coordination.cpp | 130 -- 8 files changed, 644 insertions(+), 1443 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index d74ad173811..5f77e996744 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -316,22 +315,6 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items); - auto log_store = state_manager->load_log_store(); - auto next_log_idx = log_store->next_slot(); - if (next_log_idx > 0 && next_log_idx > state_machine->last_commit_index()) - { - auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, next_log_idx); - - auto idx = state_machine->last_commit_index() + 1; - for (const auto & entry : *log_entries) - { - if (entry && entry->get_val_type() == nuraft::log_val_type::app_log) - state_machine->preprocess(idx, entry->get_buf()); - - ++idx; - } - } - loadLatestConfig(); last_local_config = state_manager->parseServersConfiguration(config, true).cluster_config; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index fa3a5195226..be7110fa841 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -44,6 +44,7 @@ namespace else /// backward compatibility request_for_session.time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + return request_for_session; } } @@ -113,21 +114,6 @@ void KeeperStateMachine::init() storage = std::make_unique(coordination_settings->dead_session_check_period_ms.totalMilliseconds(), superdigest); } -nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) -{ - preprocess(log_idx, data); - return nullptr; -} - -void KeeperStateMachine::preprocess(const uint64_t log_idx, nuraft::buffer & data) -{ - auto request_for_session = parseRequest(data); - if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) - return; - std::lock_guard lock(storage_and_responses_lock); - storage->preprocessRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, log_idx); -} - nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) { auto request_for_session = parseRequest(data); @@ -196,12 +182,6 @@ void KeeperStateMachine::commit_config(const uint64_t /* log_idx */, nuraft::ptr cluster_config = ClusterConfig::deserialize(*tmp); } -void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & /*data*/) -{ - std::lock_guard lock(storage_and_responses_lock); - storage->rollbackRequest(log_idx); -} - nuraft::ptr KeeperStateMachine::last_snapshot() { /// Just return the latest snapshot. @@ -363,7 +343,7 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi { /// Pure local request, just process it with storage std::lock_guard lock(storage_and_responses_lock); - auto responses = storage->processRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, std::nullopt, true /*check_acl*/, true /*is_local*/); + auto responses = storage->processRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, std::nullopt); for (const auto & response : responses) if (!responses_queue.push(response)) throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push response with session id {} into responses queue", response.session_id); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index aed96a59c13..73578e6a2ba 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -27,16 +27,16 @@ public: /// Read state from the latest snapshot void init(); - void preprocess(uint64_t log_idx, nuraft::buffer & data); - - nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; + /// Currently not supported + nuraft::ptr pre_commit(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; } nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT /// Save new cluster config to our snapshot (copy of the config stored in StateManager) void commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) override; /// NOLINT - void rollback(uint64_t log_idx, nuraft::buffer & data) override; + /// Currently not supported + void rollback(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override {} uint64_t last_commit_index() override { return last_committed_idx; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 6c0699be95c..f58776cf843 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1,21 +1,19 @@ -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include "Common/ZooKeeper/ZooKeeperConstants.h" -#include #include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { @@ -51,10 +49,37 @@ String getSHA1(const String & userdata) String generateDigest(const String & userdata) { std::vector user_password; - boost::split(user_password, userdata, [](char character) { return character == ':'; }); + boost::split(user_password, userdata, [](char c) { return c == ':'; }); return user_password[0] + ":" + base64Encode(getSHA1(userdata)); } +bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector & session_auths) +{ + if (node_acls.empty()) + return true; + + for (const auto & session_auth : session_auths) + if (session_auth.scheme == "super") + return true; + + for (const auto & node_acl : node_acls) + { + if (node_acl.permissions & permission) + { + if (node_acl.scheme == "world" && node_acl.id == "anyone") + return true; + + for (const auto & session_auth : session_auths) + { + if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) + return true; + } + } + } + + return false; +} + bool fixupACL( const std::vector & request_acls, const std::vector & current_ids, @@ -97,12 +122,11 @@ bool fixupACL( return valid_found; } -KeeperStorage::ResponsesForSessions processWatchesImpl( - const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) +KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) { KeeperStorage::ResponsesForSessions result; - auto watch_it = watches.find(path); - if (watch_it != watches.end()) + auto it = watches.find(path); + if (it != watches.end()) { std::shared_ptr watch_response = std::make_shared(); watch_response->path = path; @@ -110,10 +134,10 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( watch_response->zxid = -1; watch_response->type = event_type; watch_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : watch_it->second) + for (auto watcher_session : it->second) result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); - watches.erase(watch_it); + watches.erase(it); } auto parent_path = parentPath(path); @@ -132,11 +156,10 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( for (const auto & path_to_check : paths_to_check_for_list_watches) { - watch_it = list_watches.find(path_to_check); - if (watch_it != list_watches.end()) + it = list_watches.find(path_to_check); + if (it != list_watches.end()) { - std::shared_ptr watch_list_response - = std::make_shared(); + std::shared_ptr watch_list_response = std::make_shared(); watch_list_response->path = path_to_check; watch_list_response->xid = Coordination::WATCH_XID; watch_list_response->zxid = -1; @@ -146,15 +169,14 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( watch_list_response->type = Coordination::Event::DELETED; watch_list_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : watch_it->second) + for (auto watcher_session : it->second) result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); - list_watches.erase(watch_it); + list_watches.erase(it); } } return result; } - } void KeeperStorage::Node::setData(String new_data) @@ -176,322 +198,24 @@ void KeeperStorage::Node::removeChild(StringRef child_path) } KeeperStorage::KeeperStorage(int64_t tick_time_ms, const String & superdigest_) - : session_expiry_queue(tick_time_ms), superdigest(superdigest_) + : session_expiry_queue(tick_time_ms) + , superdigest(superdigest_) { container.insert("/", Node()); } -template -struct Overloaded : Ts... -{ - using Ts::operator()...; -}; - -// explicit deduction guide -// https://en.cppreference.com/w/cpp/language/class_template_argument_deduction -template -Overloaded(Ts...) -> Overloaded; - -std::shared_ptr KeeperStorage::UncommittedState::getNode(StringRef path) -{ - std::shared_ptr node{nullptr}; - - if (auto maybe_node_it = storage.container.find(path); maybe_node_it != storage.container.end()) - { - const auto & committed_node = maybe_node_it->value; - node = std::make_shared(); - node->stat = committed_node.stat; - node->seq_num = committed_node.seq_num; - node->setData(committed_node.getData()); - } - - applyDeltas( - path, - Overloaded{ - [&](const CreateNodeDelta & create_delta) - { - assert(!node); - node = std::make_shared(); - node->stat = create_delta.stat; - node->setData(create_delta.data); - }, - [&](const RemoveNodeDelta & /*remove_delta*/) - { - assert(node); - node = nullptr; - }, - [&](const UpdateNodeDelta & update_delta) - { - assert(node); - update_delta.update_fn(*node); - }, - [&](auto && /*delta*/) {}, - }); - - return node; -} - -bool KeeperStorage::UncommittedState::hasNode(StringRef path) const -{ - bool exists = storage.container.contains(std::string{path}); - applyDeltas( - path, - Overloaded{ - [&](const CreateNodeDelta & /*create_delta*/) - { - assert(!exists); - exists = true; - }, - [&](const RemoveNodeDelta & /*remove_delta*/) - { - assert(exists); - exists = false; - }, - [&](auto && /*delta*/) {}, - }); - - return exists; -} - -Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) const -{ - std::optional acl_id; - if (auto maybe_node_it = storage.container.find(path); maybe_node_it != storage.container.end()) - acl_id.emplace(maybe_node_it->value.acl_id); - - const Coordination::ACLs * acls{nullptr}; - applyDeltas( - path, - Overloaded{ - [&](const CreateNodeDelta & create_delta) - { - assert(!acl_id); - acls = &create_delta.acls; - }, - [&](const RemoveNodeDelta & /*remove_delta*/) - { - assert(acl_id || acls); - acl_id.reset(); - acls = nullptr; - }, - [&](const SetACLDelta & set_acl_delta) - { - assert(acl_id || acls); - acls = &set_acl_delta.acls; - }, - [&](auto && /*delta*/) {}, - }); - - if (acls) - return *acls; - - return acl_id ? storage.acl_map.convertNumber(*acl_id) : Coordination::ACLs{}; -} - -namespace -{ - -[[noreturn]] void onStorageInconsistency() -{ - LOG_ERROR(&Poco::Logger::get("KeeperStorage"), "Inconsistency found between uncommitted and committed data. Keeper will terminate to avoid undefined behaviour."); - std::terminate(); -} - -} - -Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_id) -{ - // Deltas are added with increasing ZXIDs - // If there are no deltas for the commit_zxid (e.g. read requests), we instantly return - // on first delta - for (auto & delta : uncommitted_state.deltas) - { - if (delta.zxid > commit_zxid) - break; - - bool finish_subdelta = false; - auto result = std::visit( - [&, &path = delta.path](DeltaType & operation) -> Coordination::Error - { - if constexpr (std::same_as) - { - if (!createNode( - path, - std::move(operation.data), - operation.stat, - operation.is_sequental, - operation.is_ephemeral, - std::move(operation.acls), - session_id)) - onStorageInconsistency(); - - return Coordination::Error::ZOK; - } - else if constexpr (std::same_as) - { - auto node_it = container.find(path); - if (node_it == container.end()) - onStorageInconsistency(); - - if (operation.version != -1 && operation.version != node_it->value.stat.version) - onStorageInconsistency(); - - container.updateValue(path, operation.update_fn); - return Coordination::Error::ZOK; - } - else if constexpr (std::same_as) - { - if (!removeNode(path, operation.version)) - onStorageInconsistency(); - - return Coordination::Error::ZOK; - } - else if constexpr (std::same_as) - { - auto node_it = container.find(path); - if (node_it == container.end()) - onStorageInconsistency(); - - if (operation.version != -1 && operation.version != node_it->value.stat.aversion) - onStorageInconsistency(); - - acl_map.removeUsage(node_it->value.acl_id); - - uint64_t acl_id = acl_map.convertACLs(operation.acls); - acl_map.addUsage(acl_id); - - container.updateValue(path, [acl_id](KeeperStorage::Node & node) { node.acl_id = acl_id; }); - - return Coordination::Error::ZOK; - } - else if constexpr (std::same_as) - return operation.error; - else if constexpr (std::same_as) - { - finish_subdelta = true; - return Coordination::Error::ZOK; - } - else if constexpr (std::same_as) - { - session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id)); - return Coordination::Error::ZOK; - } - else - { - // shouldn't be called in any process functions - onStorageInconsistency(); - } - }, - delta.operation); - - if (result != Coordination::Error::ZOK) - return result; - - if (finish_subdelta) - return Coordination::Error::ZOK; - } - - return Coordination::Error::ZOK; -} - -bool KeeperStorage::createNode( - const std::string & path, - String data, - const Coordination::Stat & stat, - bool is_sequental, - bool is_ephemeral, - Coordination::ACLs node_acls, - int64_t session_id) -{ - auto parent_path = parentPath(path); - auto node_it = container.find(parent_path); - - if (node_it == container.end()) - return false; - - if (node_it->value.stat.ephemeralOwner != 0) - return false; - - if (container.contains(path)) - return false; - - KeeperStorage::Node created_node; - - uint64_t acl_id = acl_map.convertACLs(node_acls); - acl_map.addUsage(acl_id); - - created_node.acl_id = acl_id; - created_node.stat = stat; - created_node.setData(std::move(data)); - created_node.is_sequental = is_sequental; - auto [map_key, _] = container.insert(path, created_node); - /// Take child path from key owned by map. - auto child_path = getBaseName(map_key->getKey()); - container.updateValue(parent_path, [child_path](KeeperStorage::Node & parent) { parent.addChild(child_path); }); - - if (is_ephemeral) - ephemerals[session_id].emplace(path); - - return true; -}; - -bool KeeperStorage::removeNode(const std::string & path, int32_t version) -{ - auto node_it = container.find(path); - if (node_it == container.end()) - return false; - - if (version != -1 && version != node_it->value.stat.version) - return false; - - if (node_it->value.stat.numChildren) - return false; - - auto prev_node = node_it->value; - if (prev_node.stat.ephemeralOwner != 0) - { - auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner); - ephemerals_it->second.erase(path); - if (ephemerals_it->second.empty()) - ephemerals.erase(ephemerals_it); - } - - acl_map.removeUsage(prev_node.acl_id); - - container.updateValue( - parentPath(path), - [child_basename = getBaseName(node_it->key)](KeeperStorage::Node & parent) { parent.removeChild(child_basename); }); - - container.erase(path); - return true; -} - +using Undo = std::function; struct KeeperStorageRequestProcessor { Coordination::ZooKeeperRequestPtr zk_request; - explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) : zk_request(zk_request_) { } - virtual Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const = 0; - virtual std::vector - preprocess(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/) const - { - return {}; - } - - // process the request using locally committed data - virtual Coordination::ZooKeeperResponsePtr - processLocal(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/) const - { - throw Exception{DB::ErrorCodes::LOGICAL_ERROR, "Cannot process the request locally"}; - } - - virtual KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const - { - return {}; - } - virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/, bool /*is_local*/) const { return true; } + explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) + : zk_request(zk_request_) + {} + virtual std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const = 0; + virtual KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const { return {}; } + virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/) const { return true; } virtual ~KeeperStorageRequestProcessor() = default; }; @@ -499,328 +223,331 @@ struct KeeperStorageRequestProcessor struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - Coordination::ZooKeeperResponsePtr - process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override + std::pair process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override { - return zk_request->makeResponse(); + return {zk_request->makeResponse(), {}}; } }; struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - Coordination::ZooKeeperResponsePtr - process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override + std::pair process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override { auto response = zk_request->makeResponse(); dynamic_cast(*response).path = dynamic_cast(*zk_request).path; - return response; + return {response, {}}; } }; -namespace -{ - - Coordination::ACLs getNodeACLs(KeeperStorage & storage, StringRef path, bool is_local) - { - if (is_local) - { - auto node_it = storage.container.find(path); - if (node_it == storage.container.end()) - return {}; - - return storage.acl_map.convertNumber(node_it->value.acl_id); - } - - return storage.uncommitted_state.getACLs(path); - } - -} -bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session_id, bool is_local) -{ - const auto node_acls = getNodeACLs(*this, path, is_local); - if (node_acls.empty()) - return true; - - if (uncommitted_state.hasACL(session_id, is_local, [](const auto & auth_id) { return auth_id.scheme == "super"; })) - return true; - - - for (const auto & node_acl : node_acls) - { - if (node_acl.permissions & permission) - { - if (node_acl.scheme == "world" && node_acl.id == "anyone") - return true; - - if (uncommitted_state.hasACL( - session_id, - is_local, - [&](const auto & auth_id) { return auth_id.scheme == node_acl.scheme && auth_id.id == node_acl.id; })) - return true; - } - } - - return false; -} - - struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); } - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { + auto & container = storage.container; auto path = zk_request->getPath(); - return storage.checkACL(parentPath(path), Coordination::ACL::Create, session_id, is_local); + auto parent_path = parentPath(path); + + auto it = container.find(parent_path); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + return checkACL(Coordination::ACL::Create, node_acls, session_auths); } - std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { + auto & container = storage.container; + auto & ephemerals = storage.ephemerals; + + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Undo undo; + Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); - std::vector new_deltas; - auto parent_path = parentPath(request.path); - auto parent_node = storage.uncommitted_state.getNode(parent_path); - if (parent_node == nullptr) - return {{zxid, Coordination::Error::ZNONODE}}; - - else if (parent_node->stat.ephemeralOwner != 0) - return {{zxid, Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}; + auto it = container.find(parent_path); + if (it == container.end()) + { + response.error = Coordination::Error::ZNONODE; + return { response_ptr, undo }; + } + else if (it->value.stat.ephemeralOwner != 0) + { + response.error = Coordination::Error::ZNOCHILDRENFOREPHEMERALS; + return { response_ptr, undo }; + } std::string path_created = request.path; if (request.is_sequential) { - auto seq_num = parent_node->seq_num; + auto seq_num = it->value.seq_num; - std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM seq_num_str.exceptions(std::ios::failbit); seq_num_str << std::setw(10) << std::setfill('0') << seq_num; path_created += seq_num_str.str(); } - - if (storage.uncommitted_state.hasNode(path_created)) - return {{zxid, Coordination::Error::ZNODEEXISTS}}; - - if (getBaseName(path_created).size == 0) - return {{zxid, Coordination::Error::ZBADARGUMENTS}}; - - Coordination::ACLs node_acls; - if (!fixupACL(request.acls, storage.session_and_auth[session_id], node_acls)) - return {{zxid, Coordination::Error::ZINVALIDACL}}; - - Coordination::Stat stat; - stat.czxid = zxid; - stat.mzxid = zxid; - stat.pzxid = zxid; - stat.ctime = time; - stat.mtime = time; - stat.numChildren = 0; - stat.version = 0; - stat.aversion = 0; - stat.cversion = 0; - stat.dataLength = request.data.length(); - stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; - - new_deltas.emplace_back( - std::move(path_created), - zxid, - KeeperStorage::CreateNodeDelta{stat, request.is_ephemeral, request.is_sequential, std::move(node_acls), request.data}); - - int32_t parent_cversion = request.parent_cversion; - - new_deltas.emplace_back( - std::string{parent_path}, - zxid, - KeeperStorage::UpdateNodeDelta{[parent_cversion, zxid](KeeperStorage::Node & node) - { - ++node.seq_num; - if (parent_cversion == -1) - ++node.stat.cversion; - else if (parent_cversion > node.stat.cversion) - node.stat.cversion = parent_cversion; - - if (zxid > node.stat.pzxid) - node.stat.pzxid = zxid; - ++node.stat.numChildren; - }}); - return new_deltas; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override - { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); - - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + if (container.contains(path_created)) { - response.error = result; - return response_ptr; + response.error = Coordination::Error::ZNODEEXISTS; + return { response_ptr, undo }; + } + if (getBaseName(path_created).size == 0) + { + response.error = Coordination::Error::ZBADARGUMENTS; + return { response_ptr, undo }; } - const auto & deltas = storage.uncommitted_state.deltas; - auto create_delta_it = std::find_if( - deltas.begin(), - deltas.end(), - [zxid](const auto & delta) - { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); + auto & session_auth_ids = storage.session_and_auth[session_id]; - assert(create_delta_it != deltas.end()); + KeeperStorage::Node created_node; + + Coordination::ACLs node_acls; + if (!fixupACL(request.acls, session_auth_ids, node_acls)) + { + response.error = Coordination::Error::ZINVALIDACL; + return {response_ptr, {}}; + } + + uint64_t acl_id = storage.acl_map.convertACLs(node_acls); + storage.acl_map.addUsage(acl_id); + + created_node.acl_id = acl_id; + created_node.stat.czxid = zxid; + created_node.stat.mzxid = zxid; + created_node.stat.pzxid = zxid; + created_node.stat.ctime = time; + created_node.stat.mtime = time; + created_node.stat.numChildren = 0; + created_node.stat.dataLength = request.data.length(); + created_node.stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; + created_node.is_sequental = request.is_sequential; + created_node.setData(std::move(request.data)); + + auto [map_key, _] = container.insert(path_created, created_node); + /// Take child path from key owned by map. + auto child_path = getBaseName(map_key->getKey()); + + int32_t parent_cversion = request.parent_cversion; + int64_t prev_parent_zxid; + int32_t prev_parent_cversion; + container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid, + parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent) + { + parent.addChild(child_path); + prev_parent_cversion = parent.stat.cversion; + prev_parent_zxid = parent.stat.pzxid; + + /// Increment sequential number even if node is not sequential + ++parent.seq_num; + + if (parent_cversion == -1) + ++parent.stat.cversion; + else if (parent_cversion > parent.stat.cversion) + parent.stat.cversion = parent_cversion; + + if (zxid > parent.stat.pzxid) + parent.stat.pzxid = zxid; + ++parent.stat.numChildren; + }); + + response.path_created = path_created; + + if (request.is_ephemeral) + ephemerals[session_id].emplace(path_created); + + undo = [&storage, prev_parent_zxid, prev_parent_cversion, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id] + { + storage.acl_map.removeUsage(acl_id); + + if (is_ephemeral) + storage.ephemerals[session_id].erase(path_created); + + storage.container.updateValue(parent_path, [child_path, prev_parent_zxid, prev_parent_cversion] (KeeperStorage::Node & undo_parent) + { + --undo_parent.stat.numChildren; + --undo_parent.seq_num; + undo_parent.stat.cversion = prev_parent_cversion; + undo_parent.stat.pzxid = prev_parent_zxid; + undo_parent.removeChild(child_path); + }); + + storage.container.erase(path_created); + }; - response.path_created = create_delta_it->path; response.error = Coordination::Error::ZOK; - return response_ptr; + return { response_ptr, undo }; } }; struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); + auto & container = storage.container; + auto it = container.find(zk_request->getPath()); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + return checkACL(Coordination::ACL::Read, node_acls, session_auths); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override - { - Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - - if (!storage.uncommitted_state.hasNode(request.path)) - return {{zxid, Coordination::Error::ZNONODE}}; - - return {}; - } - - template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const + std::pair process(KeeperStorage & storage, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override { + auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperGetResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - if constexpr (!local) + auto it = container.find(request.path); + if (it == container.end()) { - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) - { - response.error = result; - return response_ptr; - } - } - - auto & container = storage.container; - auto node_it = container.find(request.path); - if (node_it == container.end()) - { - if constexpr (local) - response.error = Coordination::Error::ZNONODE; - else - onStorageInconsistency(); + response.error = Coordination::Error::ZNONODE; } else { - response.stat = node_it->value.stat; - response.data = node_it->value.getData(); + response.stat = it->value.stat; + response.data = it->value.getData(); response.error = Coordination::Error::ZOK; } - return response_ptr; - } - - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); - } - - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); + return { response_ptr, {} }; } }; +namespace +{ + /// Garbage required to apply log to "fuzzy" zookeeper snapshot + void updateParentPzxid(const std::string & child_path, int64_t zxid, KeeperStorage::Container & container) + { + auto parent_path = parentPath(child_path); + auto parent_it = container.find(parent_path); + if (parent_it != container.end()) + { + container.updateValue(parent_path, [zxid](KeeperStorage::Node & parent) + { + if (parent.stat.pzxid < zxid) + parent.stat.pzxid = zxid; + }); + } + } +} + struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { - return storage.checkACL(parentPath(zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); + auto & container = storage.container; + auto it = container.find(parentPath(zk_request->getPath())); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + return checkACL(Coordination::ACL::Delete, node_acls, session_auths); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /* time */) const override { - Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*zk_request); + auto & container = storage.container; + auto & ephemerals = storage.ephemerals; - std::vector new_deltas; - - const auto update_parent_pzxid = [&]() - { - auto parent_path = parentPath(request.path); - if (!storage.uncommitted_state.hasNode(parent_path)) - return; - - new_deltas.emplace_back( - std::string{parent_path}, - zxid, - KeeperStorage::UpdateNodeDelta{[zxid](KeeperStorage::Node & parent) - { - if (parent.stat.pzxid < zxid) - parent.stat.pzxid = zxid; - }}); - }; - - auto node = storage.uncommitted_state.getNode(request.path); - - if (!node) - { - if (request.restored_from_zookeeper_log) - update_parent_pzxid(); - return {{zxid, Coordination::Error::ZNONODE}}; - } - else if (request.version != -1 && request.version != node->stat.version) - return {{zxid, Coordination::Error::ZBADVERSION}}; - else if (node->stat.numChildren) - return {{zxid, Coordination::Error::ZNOTEMPTY}}; - - if (request.restored_from_zookeeper_log) - update_parent_pzxid(); - - new_deltas.emplace_back( - std::string{parentPath(request.path)}, - zxid, - KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent) - { - --parent.stat.numChildren; - ++parent.stat.cversion; - }}); - - new_deltas.emplace_back(request.path, zxid, KeeperStorage::RemoveNodeDelta{request.version}); - - return new_deltas; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override - { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); + Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*zk_request); + Undo undo; - response.error = storage.commit(zxid, session_id); - return response_ptr; + auto it = container.find(request.path); + if (it == container.end()) + { + if (request.restored_from_zookeeper_log) + updateParentPzxid(request.path, zxid, container); + response.error = Coordination::Error::ZNONODE; + } + else if (request.version != -1 && request.version != it->value.stat.version) + { + response.error = Coordination::Error::ZBADVERSION; + } + else if (it->value.stat.numChildren) + { + response.error = Coordination::Error::ZNOTEMPTY; + } + else + { + if (request.restored_from_zookeeper_log) + updateParentPzxid(request.path, zxid, container); + + auto prev_node = it->value; + if (prev_node.stat.ephemeralOwner != 0) + { + auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner); + ephemerals_it->second.erase(request.path); + if (ephemerals_it->second.empty()) + ephemerals.erase(ephemerals_it); + } + + storage.acl_map.removeUsage(prev_node.acl_id); + + container.updateValue(parentPath(request.path), [child_basename = getBaseName(it->key)] (KeeperStorage::Node & parent) + { + --parent.stat.numChildren; + ++parent.stat.cversion; + parent.removeChild(child_basename); + }); + + response.error = Coordination::Error::ZOK; + /// Erase full path from container after child removed from parent + container.erase(request.path); + + undo = [prev_node, &storage, path = request.path] + { + if (prev_node.stat.ephemeralOwner != 0) + storage.ephemerals[prev_node.stat.ephemeralOwner].emplace(path); + + storage.acl_map.addUsage(prev_node.acl_id); + + /// Dangerous place: we are adding StringRef to child into children unordered_hash set. + /// That's why we are taking getBaseName from inserted key, not from the path from request object. + auto [map_key, _] = storage.container.insert(path, prev_node); + storage.container.updateValue(parentPath(path), [child_name = getBaseName(map_key->getKey())] (KeeperStorage::Node & parent) + { + ++parent.stat.numChildren; + --parent.stat.cversion; + parent.addChild(child_name); + }); + }; + } + + return { response_ptr, undo }; } - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); } @@ -829,140 +556,101 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /* session_id */, int64_t /* time */) const override { - Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); + auto & container = storage.container; - if (!storage.uncommitted_state.hasNode(request.path)) - return {{zxid, Coordination::Error::ZNONODE}}; - - return {}; - } - - template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const - { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperExistsResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); - if constexpr (!local) + auto it = container.find(request.path); + if (it != container.end()) { - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) - { - response.error = result; - return response_ptr; - } - } - - auto & container = storage.container; - auto node_it = container.find(request.path); - if (node_it == container.end()) - { - if constexpr (local) - response.error = Coordination::Error::ZNONODE; - else - onStorageInconsistency(); + response.stat = it->value.stat; + response.error = Coordination::Error::ZOK; } else { - response.stat = node_it->value.stat; - response.error = Coordination::Error::ZOK; + response.error = Coordination::Error::ZNONODE; } - return response_ptr; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); - } - - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); + return { response_ptr, {} }; } }; struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Write, session_id, is_local); + auto & container = storage.container; + auto it = container.find(zk_request->getPath()); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + return checkACL(Coordination::ACL::Write, node_acls, session_auths); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t time) const override - { - Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); - - std::vector new_deltas; - - if (!storage.uncommitted_state.hasNode(request.path)) - return {{zxid, Coordination::Error::ZNONODE}}; - - auto node = storage.uncommitted_state.getNode(request.path); - - if (request.version != -1 && request.version != node->stat.version) - return {{zxid, Coordination::Error::ZBADVERSION}}; - - new_deltas.emplace_back( - request.path, - zxid, - KeeperStorage::UpdateNodeDelta{ - [zxid, data = request.data, time](KeeperStorage::Node & value) - { - value.stat.version++; - value.stat.mzxid = zxid; - value.stat.mtime = time; - value.stat.dataLength = data.length(); - value.setData(data); - }, - request.version}); - - new_deltas.emplace_back( - parentPath(request.path).toString(), - zxid, - KeeperStorage::UpdateNodeDelta - { - [](KeeperStorage::Node & parent) - { - parent.stat.cversion++; - } - } - ); - - return new_deltas; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override + std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /* session_id */, int64_t time) const override { auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperSetResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); + Undo undo; - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + auto it = container.find(request.path); + if (it == container.end()) { - response.error = result; - return response_ptr; + response.error = Coordination::Error::ZNONODE; + } + else if (request.version == -1 || request.version == it->value.stat.version) + { + + auto prev_node = it->value; + + auto itr = container.updateValue(request.path, [zxid, request, time] (KeeperStorage::Node & value) mutable + { + value.stat.version++; + value.stat.mzxid = zxid; + value.stat.mtime = time; + value.stat.dataLength = request.data.length(); + value.setData(std::move(request.data)); + }); + + container.updateValue(parentPath(request.path), [] (KeeperStorage::Node & parent) + { + parent.stat.cversion++; + }); + + response.stat = itr->value.stat; + response.error = Coordination::Error::ZOK; + + undo = [prev_node, &container, path = request.path] + { + container.updateValue(path, [&prev_node] (KeeperStorage::Node & value) { value = prev_node; }); + container.updateValue(parentPath(path), [] (KeeperStorage::Node & parent) + { + parent.stat.cversion--; + }); + }; + } + else + { + response.error = Coordination::Error::ZBADVERSION; } - auto node_it = container.find(request.path); - if (node_it == container.end()) - onStorageInconsistency(); - - response.stat = node_it->value.stat; - response.error = Coordination::Error::ZOK; - - return response_ptr; + return { response_ptr, undo }; } - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); } @@ -970,48 +658,33 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); + auto & container = storage.container; + auto it = container.find(zk_request->getPath()); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + return checkACL(Coordination::ACL::Read, node_acls, session_auths); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override - { - Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); - - if (!storage.uncommitted_state.hasNode(request.path)) - return {{zxid, Coordination::Error::ZNONODE}}; - - return {}; - } - - - template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const + std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override { + auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperListResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); - if constexpr (!local) + auto it = container.find(request.path); + if (it == container.end()) { - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) - { - response.error = result; - return response_ptr; - } - } - - auto & container = storage.container; - auto node_it = container.find(request.path); - if (node_it == container.end()) - { - if constexpr (local) - response.error = Coordination::Error::ZNONODE; - else - onStorageInconsistency(); + response.error = Coordination::Error::ZNONODE; } else { @@ -1019,247 +692,174 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc if (path_prefix.empty()) throw DB::Exception("Logical error: path cannot be empty", ErrorCodes::LOGICAL_ERROR); - const auto & children = node_it->value.getChildren(); + const auto & children = it->value.getChildren(); response.names.reserve(children.size()); for (const auto child : children) response.names.push_back(child.toString()); - response.stat = node_it->value.stat; + response.stat = it->value.stat; response.error = Coordination::Error::ZOK; } - return response_ptr; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); - } - - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); + return { response_ptr, {} }; } }; struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); + auto & container = storage.container; + auto it = container.find(zk_request->getPath()); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + return checkACL(Coordination::ACL::Read, node_acls, session_auths); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override { - Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); + auto & container = storage.container; - if (!storage.uncommitted_state.hasNode(request.path)) - return {{zxid, Coordination::Error::ZNONODE}}; - - auto node = storage.uncommitted_state.getNode(request.path); - if (request.version != -1 && request.version != node->stat.version) - return {{zxid, Coordination::Error::ZBADVERSION}}; - - return {}; - } - - template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const - { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperCheckResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); - - if constexpr (!local) + auto it = container.find(request.path); + if (it == container.end()) { - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) - { - response.error = result; - return response_ptr; - } + response.error = Coordination::Error::ZNONODE; } - - const auto on_error = [&]([[maybe_unused]] const auto error_code) + else if (request.version != -1 && request.version != it->value.stat.version) { - if constexpr (local) - response.error = error_code; - else - onStorageInconsistency(); - }; - - auto & container = storage.container; - auto node_it = container.find(request.path); - if (node_it == container.end()) - { - on_error(Coordination::Error::ZNONODE); - } - else if (request.version != -1 && request.version != node_it->value.stat.version) - { - on_error(Coordination::Error::ZBADVERSION); + response.error = Coordination::Error::ZBADVERSION; } else { response.error = Coordination::Error::ZOK; } - return response_ptr; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); - } - - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); + return { response_ptr, {} }; } }; struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin, session_id, is_local); + auto & container = storage.container; + auto it = container.find(zk_request->getPath()); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + return checkACL(Coordination::ACL::Admin, node_acls, session_auths); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override + std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id, int64_t /* time */) const override { - Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); + auto & container = storage.container; - auto & uncommitted_state = storage.uncommitted_state; - if (!uncommitted_state.hasNode(request.path)) - return {{zxid, Coordination::Error::ZNONODE}}; - - auto node = uncommitted_state.getNode(request.path); - - if (request.version != -1 && request.version != node->stat.aversion) - return {{zxid, Coordination::Error::ZBADVERSION}}; - - - auto & session_auth_ids = storage.session_and_auth[session_id]; - Coordination::ACLs node_acls; - - if (!fixupACL(request.acls, session_auth_ids, node_acls)) - return {{zxid, Coordination::Error::ZINVALIDACL}}; - - return - { - { - request.path, - zxid, - KeeperStorage::SetACLDelta{std::move(node_acls), request.version} - }, - { - request.path, - zxid, - KeeperStorage::UpdateNodeDelta - { - [](KeeperStorage::Node & n) { ++n.stat.aversion; } - } - } - }; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override - { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperSetACLResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); - - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + auto it = container.find(request.path); + if (it == container.end()) { - response.error = result; - return response_ptr; + response.error = Coordination::Error::ZNONODE; + } + else if (request.version != -1 && request.version != it->value.stat.aversion) + { + response.error = Coordination::Error::ZBADVERSION; + } + else + { + auto & session_auth_ids = storage.session_and_auth[session_id]; + Coordination::ACLs node_acls; + + if (!fixupACL(request.acls, session_auth_ids, node_acls)) + { + response.error = Coordination::Error::ZINVALIDACL; + return {response_ptr, {}}; + } + + uint64_t acl_id = storage.acl_map.convertACLs(node_acls); + storage.acl_map.addUsage(acl_id); + + storage.container.updateValue(request.path, [acl_id] (KeeperStorage::Node & node) + { + node.acl_id = acl_id; + ++node.stat.aversion; + }); + + response.stat = it->value.stat; + response.error = Coordination::Error::ZOK; } - auto node_it = storage.container.find(request.path); - if (node_it == storage.container.end()) - onStorageInconsistency(); - response.stat = node_it->value.stat; - response.error = Coordination::Error::ZOK; - - return response_ptr; + /// It cannot be used insied multitransaction? + return { response_ptr, {} }; } }; struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin | Coordination::ACL::Read, session_id, is_local); - } + auto & container = storage.container; + auto it = container.find(zk_request->getPath()); + if (it == container.end()) + return true; + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + /// LOL, GetACL require more permissions, then SetACL... + return checkACL(Coordination::ACL::Admin | Coordination::ACL::Read, node_acls, session_auths); + } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override - { - Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); - - if (!storage.uncommitted_state.hasNode(request.path)) - return {{zxid, Coordination::Error::ZNONODE}}; - - return {}; - } - - template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const + std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperGetACLResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); - - if constexpr (!local) - { - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) - { - response.error = result; - return response_ptr; - } - } - auto & container = storage.container; - auto node_it = container.find(request.path); - if (node_it == container.end()) + auto it = container.find(request.path); + if (it == container.end()) { - if constexpr (local) - response.error = Coordination::Error::ZNONODE; - else - onStorageInconsistency(); + response.error = Coordination::Error::ZNONODE; } else { - response.stat = node_it->value.stat; - response.acl = storage.acl_map.convertNumber(node_it->value.acl_id); + response.stat = it->value.stat; + response.acl = storage.acl_map.convertNumber(it->value.acl_id); } - return response_ptr; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); - } - - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - return processImpl(storage, zxid, session_id, time); + return {response_ptr, {}}; } }; struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { for (const auto & concrete_request : concrete_requests) - if (!concrete_request->checkAuth(storage, session_id, is_local)) + if (!concrete_request->checkAuth(storage, session_id)) return false; return true; } @@ -1289,124 +889,65 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro concrete_requests.push_back(std::make_shared(sub_zk_request)); break; default: - throw DB::Exception( - ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); } } } - std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { - // manually add deltas so that the result of previous request in the transaction is used in the next request - auto & saved_deltas = storage.uncommitted_state.deltas; + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); + std::vector undo_actions; - std::vector response_errors; - response_errors.reserve(concrete_requests.size()); - for (size_t i = 0; i < concrete_requests.size(); ++i) + try { - auto new_deltas = concrete_requests[i]->preprocess(storage, zxid, session_id, time); - - if (!new_deltas.empty()) + size_t i = 0; + for (const auto & concrete_request : concrete_requests) { - if (auto * error = std::get_if(&new_deltas.back().operation)) + auto [ cur_response, undo_action ] = concrete_request->process(storage, zxid, session_id, time); + + response.responses[i] = cur_response; + if (cur_response->error != Coordination::Error::ZOK) { - std::erase_if(saved_deltas, [zxid](const auto & delta) { return delta.zxid == zxid; }); - - response_errors.push_back(error->error); - - for (size_t j = i + 1; j < concrete_requests.size(); ++j) + for (size_t j = 0; j <= i; ++j) { - response_errors.push_back(Coordination::Error::ZRUNTIMEINCONSISTENCY); + auto response_error = response.responses[j]->error; + response.responses[j] = std::make_shared(); + response.responses[j]->error = response_error; } - return {{zxid, KeeperStorage::FailedMultiDelta{std::move(response_errors)}}}; + for (size_t j = i + 1; j < response.responses.size(); ++j) + { + response.responses[j] = std::make_shared(); + response.responses[j]->error = Coordination::Error::ZRUNTIMEINCONSISTENCY; + } + + for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it) + if (*it) + (*it)(); + + return { response_ptr, {} }; } + else + undo_actions.emplace_back(std::move(undo_action)); + + ++i; } - new_deltas.emplace_back(zxid, KeeperStorage::SubDeltaEnd{}); - response_errors.push_back(Coordination::Error::ZOK); - saved_deltas.insert(saved_deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); + response.error = Coordination::Error::ZOK; + return { response_ptr, {} }; + } + catch (...) + { + for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it) + if (*it) + (*it)(); + throw; } - - return {}; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); - - auto & deltas = storage.uncommitted_state.deltas; - // the deltas will have at least SubDeltaEnd or FailedMultiDelta - assert(!deltas.empty()); - if (auto * failed_multi = std::get_if(&deltas.front().operation)) - { - for (size_t i = 0; i < concrete_requests.size(); ++i) - { - response.responses[i] = std::make_shared(); - response.responses[i]->error = failed_multi->error_codes[i]; - } - - return response_ptr; - } - - for (size_t i = 0; i < concrete_requests.size(); ++i) - { - auto cur_response = concrete_requests[i]->process(storage, zxid, session_id, time); - - while (!deltas.empty()) - { - if (std::holds_alternative(deltas.front().operation)) - { - deltas.pop_front(); - break; - } - - deltas.pop_front(); - } - - response.responses[i] = cur_response; - } - - response.error = Coordination::Error::ZOK; - return response_ptr; - } - - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); - - for (size_t i = 0; i < concrete_requests.size(); ++i) - { - auto cur_response = concrete_requests[i]->process(storage, zxid, session_id, time); - - response.responses[i] = cur_response; - if (cur_response->error != Coordination::Error::ZOK) - { - for (size_t j = 0; j <= i; ++j) - { - auto response_error = response.responses[j]->error; - response.responses[j] = std::make_shared(); - response.responses[j]->error = response_error; - } - - for (size_t j = i + 1; j < response.responses.size(); ++j) - { - response.responses[j] = std::make_shared(); - response.responses[j]->error = Coordination::Error::ZRUNTIMEINCONSISTENCY; - } - - return response_ptr; - } - } - - response.error = Coordination::Error::ZOK; - return response_ptr; - } - - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { KeeperStorage::ResponsesForSessions result; for (const auto & generic_request : concrete_requests) @@ -1421,7 +962,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - Coordination::ZooKeeperResponsePtr process(KeeperStorage &, int64_t, int64_t, int64_t /* time */) const override + std::pair process(KeeperStorage &, int64_t, int64_t, int64_t /* time */) const override { throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR); } @@ -1430,40 +971,36 @@ struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestPro struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override + std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id, int64_t /* time */) const override { Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*zk_request); Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); + auto & sessions_and_auth = storage.session_and_auth; if (auth_request.scheme != "digest" || std::count(auth_request.data.begin(), auth_request.data.end(), ':') != 1) - return {{zxid, Coordination::Error::ZAUTHFAILED}}; - - std::vector new_deltas; - auto digest = generateDigest(auth_request.data); - if (digest == storage.superdigest) { - KeeperStorage::AuthID auth{"super", ""}; - new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(auth)}); + auth_response.error = Coordination::Error::ZAUTHFAILED; } else { - KeeperStorage::AuthID new_auth{auth_request.scheme, digest}; - if (!storage.uncommitted_state.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) - new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(new_auth)}); + auto digest = generateDigest(auth_request.data); + if (digest == storage.superdigest) + { + KeeperStorage::AuthID auth{"super", ""}; + sessions_and_auth[session_id].emplace_back(auth); + } + else + { + KeeperStorage::AuthID auth{auth_request.scheme, digest}; + auto & session_ids = sessions_and_auth[session_id]; + if (std::find(session_ids.begin(), session_ids.end(), auth) == session_ids.end()) + sessions_and_auth[session_id].emplace_back(auth); + } + } - return new_deltas; - } - - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override - { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); - - if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) - auth_response.error = result; - - return response_ptr; + return { response_ptr, {} }; } }; @@ -1489,6 +1026,7 @@ void KeeperStorage::finalize() class KeeperStorageRequestProcessorsFactory final : private boost::noncopyable { + public: using Creator = std::function; using OpNumToRequest = std::unordered_map; @@ -1501,11 +1039,11 @@ public: KeeperStorageRequestProcessorPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const { - auto request_it = op_num_to_request.find(zk_request->getOpNum()); - if (request_it == op_num_to_request.end()) + auto it = op_num_to_request.find(zk_request->getOpNum()); + if (it == op_num_to_request.end()) throw DB::Exception("Unknown operation type " + toString(zk_request->getOpNum()), ErrorCodes::LOGICAL_ERROR); - return request_it->second(zk_request); + return it->second(zk_request); } void registerRequest(Coordination::OpNum op_num, Creator creator) @@ -1519,11 +1057,10 @@ private: KeeperStorageRequestProcessorsFactory(); }; -template +template void registerKeeperRequestProcessor(KeeperStorageRequestProcessorsFactory & factory) { - factory.registerRequest( - num, [](const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); + factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); } @@ -1547,66 +1084,13 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() } -void KeeperStorage::preprocessRequest( - const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, int64_t new_last_zxid, bool check_acl) -{ - KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); - - if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special - { - auto & deltas = uncommitted_state.deltas; - auto session_ephemerals = ephemerals.find(session_id); - if (session_ephemerals != ephemerals.end()) - { - for (const auto & ephemeral_path : session_ephemerals->second) - { - // For now just add deltas for removing the node - // On commit, ephemerals nodes will be deleted from storage - // and removed from the session - if (uncommitted_state.hasNode(ephemeral_path)) - { - deltas.emplace_back( - parentPath(ephemeral_path).toString(), - new_last_zxid, - UpdateNodeDelta{[ephemeral_path](Node & parent) - { - --parent.stat.numChildren; - ++parent.stat.cversion; - }}); - - deltas.emplace_back(ephemeral_path, new_last_zxid, RemoveNodeDelta()); - } - } - } - - return; - } - - if (check_acl && !request_processor->checkAuth(*this, session_id, false)) - { - uncommitted_state.deltas.emplace_back(new_last_zxid, Coordination::Error::ZNOAUTH); - return; - } - - auto new_deltas = request_processor->preprocess(*this, new_last_zxid, session_id, time); - uncommitted_state.deltas.insert( - uncommitted_state.deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); -} - -KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( - const Coordination::ZooKeeperRequestPtr & zk_request, - int64_t session_id, - int64_t time, - std::optional new_last_zxid, - bool check_acl, - bool is_local) +KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl) { KeeperStorage::ResponsesForSessions results; if (new_last_zxid) { if (zxid >= *new_last_zxid) - throw Exception( - ErrorCodes::LOGICAL_ERROR, "Got new ZXID {} smaller or equal than current {}. It's a bug", *new_last_zxid, zxid); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got new ZXID {} smaller or equal than current {}. It's a bug", *new_last_zxid, zxid); zxid = *new_last_zxid; } @@ -1615,22 +1099,26 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { - commit(zxid, session_id); - - for (const auto & delta : uncommitted_state.deltas) + auto it = ephemerals.find(session_id); + if (it != ephemerals.end()) { - if (delta.zxid > zxid) - break; - - if (std::holds_alternative(delta.operation)) + for (const auto & ephemeral_path : it->second) { - auto responses = processWatchesImpl(delta.path, watches, list_watches, Coordination::Event::DELETED); + container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (KeeperStorage::Node & parent) + { + --parent.stat.numChildren; + ++parent.stat.cversion; + auto base_name = getBaseName(ephemeral_path); + parent.removeChild(base_name); + }); + + container.erase(ephemeral_path); + + auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED); results.insert(results.end(), responses.begin(), responses.end()); } + ephemerals.erase(it); } - - std::erase_if(uncommitted_state.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); - clearDeadWatches(session_id); auto auth_it = session_and_auth.find(session_id); if (auth_it != session_and_auth.end()) @@ -1647,7 +1135,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special { KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); - auto response = storage_request->process(*this, zxid, session_id, time); + auto [response, _] = storage_request->process(*this, zxid, session_id, time); response->xid = zk_request->xid; response->zxid = getZXID(); @@ -1658,24 +1146,15 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); Coordination::ZooKeeperResponsePtr response; - if (is_local) + if (check_acl && !request_processor->checkAuth(*this, session_id)) { - assert(zk_request->isReadRequest()); - if (check_acl && !request_processor->checkAuth(*this, session_id, true)) - { - response = zk_request->makeResponse(); - /// Original ZooKeeper always throws no auth, even when user provided some credentials - response->error = Coordination::Error::ZNOAUTH; - } - else - { - response = request_processor->processLocal(*this, zxid, session_id, time); - } + response = zk_request->makeResponse(); + /// Original ZooKeeper always throws no auth, even when user provided some credentials + response->error = Coordination::Error::ZNOAUTH; } else { - response = request_processor->process(*this, zxid, session_id, time); - std::erase_if(uncommitted_state.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); + std::tie(response, std::ignore) = request_processor->process(*this, zxid, session_id, time); } /// Watches for this requests are added to the watches lists @@ -1683,8 +1162,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( { if (response->error == Coordination::Error::ZOK) { - auto & watches_type - = zk_request->getOpNum() == Coordination::OpNum::List || zk_request->getOpNum() == Coordination::OpNum::SimpleList + auto & watches_type = zk_request->getOpNum() == Coordination::OpNum::List || zk_request->getOpNum() == Coordination::OpNum::SimpleList ? list_watches : watches; @@ -1714,16 +1192,6 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( return results; } -void KeeperStorage::rollbackRequest(int64_t rollback_zxid) -{ - // we can only rollback the last zxid (if there is any) - // if there is a delta with a larger zxid, we have invalid state - const auto last_zxid = uncommitted_state.deltas.back().zxid; - if (!uncommitted_state.deltas.empty() && last_zxid > rollback_zxid) - throw DB::Exception{DB::ErrorCodes::LOGICAL_ERROR, "Invalid state of deltas found while trying to rollback request. Last ZXID ({}) is larger than the requested ZXID ({})", last_zxid, rollback_zxid}; - - std::erase_if(uncommitted_state.deltas, [rollback_zxid](const auto & delta) { return delta.zxid == rollback_zxid; }); -} void KeeperStorage::clearDeadWatches(int64_t session_id) { diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 7d26ae24dd9..ccbddcf6e19 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -1,14 +1,14 @@ #pragma once -#include -#include -#include +#include +#include +#include #include +#include #include #include -#include -#include -#include +#include +#include #include @@ -29,6 +29,7 @@ struct KeeperStorageSnapshot; class KeeperStorage { public: + struct Node { uint64_t acl_id = 0; /// 0 -- no ACL by default @@ -40,18 +41,26 @@ public: Node() : size_bytes(sizeof(Node)) { } /// Object memory size - uint64_t sizeInBytes() const { return size_bytes; } + uint64_t sizeInBytes() const + { + return size_bytes; + } void setData(String new_data); - const auto & getData() const noexcept { return data; } + const auto & getData() const noexcept + { + return data; + } void addChild(StringRef child_path); void removeChild(StringRef child_path); - const auto & getChildren() const noexcept { return children; } - + const auto & getChildren() const noexcept + { + return children; + } private: String data; ChildrenSet children{}; @@ -76,7 +85,10 @@ public: std::string scheme; std::string id; - bool operator==(const AuthID & other) const { return scheme == other.scheme && id == other.id; } + bool operator==(const AuthID & other) const + { + return scheme == other.scheme && id == other.id; + } }; using RequestsForSessions = std::vector; @@ -100,146 +112,6 @@ public: /// container. Container container; - // Applying ZooKeeper request to storage consists of two steps: - // - preprocessing which, instead of applying the changes directly to storage, - // generates deltas with those changes, denoted with the request ZXID - // - processing which applies deltas with the correct ZXID to the storage - // - // Delta objects allow us two things: - // - fetch the latest, uncommitted state of an object by getting the committed - // state of that same object from the storage and applying the deltas - // in the same order as they are defined - // - quickly commit the changes to the storage - struct CreateNodeDelta - { - Coordination::Stat stat; - bool is_ephemeral; - bool is_sequental; - Coordination::ACLs acls; - String data; - }; - - struct RemoveNodeDelta - { - int32_t version{-1}; - }; - - struct UpdateNodeDelta - { - std::function update_fn; - int32_t version{-1}; - }; - - struct SetACLDelta - { - Coordination::ACLs acls; - int32_t version{-1}; - }; - - struct ErrorDelta - { - Coordination::Error error; - }; - - struct FailedMultiDelta - { - std::vector error_codes; - }; - - // Denotes end of a subrequest in multi request - struct SubDeltaEnd - { - }; - - struct AddAuthDelta - { - int64_t session_id; - AuthID auth_id; - }; - - using Operation - = std::variant; - - struct Delta - { - Delta(String path_, int64_t zxid_, Operation operation_) : path(std::move(path_)), zxid(zxid_), operation(std::move(operation_)) { } - - Delta(int64_t zxid_, Coordination::Error error) : Delta("", zxid_, ErrorDelta{error}) { } - - Delta(int64_t zxid_, Operation subdelta) : Delta("", zxid_, subdelta) { } - - String path; - int64_t zxid; - Operation operation; - }; - - struct UncommittedState - { - explicit UncommittedState(KeeperStorage & storage_) : storage(storage_) { } - - template - void applyDeltas(StringRef path, const Visitor & visitor) const - { - for (const auto & delta : deltas) - { - if (path.empty() || delta.path == path) - std::visit(visitor, delta.operation); - } - } - - bool hasACL(int64_t session_id, bool is_local, std::function predicate) - { - for (const auto & session_auth : storage.session_and_auth[session_id]) - { - if (predicate(session_auth)) - return true; - } - - if (is_local) - return false; - - - for (const auto & delta : deltas) - { - if (const auto * auth_delta = std::get_if(&delta.operation); - auth_delta && auth_delta->session_id == session_id && predicate(auth_delta->auth_id)) - return true; - } - - return false; - } - - std::shared_ptr getNode(StringRef path); - bool hasNode(StringRef path) const; - Coordination::ACLs getACLs(StringRef path) const; - - std::deque deltas; - KeeperStorage & storage; - }; - - UncommittedState uncommitted_state{*this}; - - Coordination::Error commit(int64_t zxid, int64_t session_id); - - // Create node in the storage - // Returns false if it failed to create the node, true otherwise - // We don't care about the exact failure because we should've caught it during preprocessing - bool createNode( - const std::string & path, - String data, - const Coordination::Stat & stat, - bool is_sequental, - bool is_ephemeral, - Coordination::ACLs node_acls, - int64_t session_id); - - // Remove node in the storage - // Returns false if it failed to remove the node, true otherwise - // We don't care about the exact failure because we should've caught it during preprocessing - bool removeNode(const std::string & path, int32_t version); - - bool checkACL(StringRef path, int32_t permissions, int64_t session_id, bool is_local); - /// Mapping session_id -> set of ephemeral nodes paths Ephemerals ephemerals; /// Mapping session_id -> set of watched nodes paths @@ -258,12 +130,15 @@ public: /// Currently active watches (node_path -> subscribed sessions) Watches watches; - Watches list_watches; /// Watches for 'list' request (watches on children). + Watches list_watches; /// Watches for 'list' request (watches on children). void clearDeadWatches(int64_t session_id); /// Get current zxid - int64_t getZXID() const { return zxid; } + int64_t getZXID() const + { + return zxid; + } const String superdigest; @@ -287,53 +162,78 @@ public: /// Process user request and return response. /// check_acl = false only when converting data from ZooKeeper. - ResponsesForSessions processRequest( - const Coordination::ZooKeeperRequestPtr & request, - int64_t session_id, - int64_t time, - std::optional new_last_zxid, - bool check_acl = true, - bool is_local = false); - void preprocessRequest( - const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid, bool check_acl = true); - void rollbackRequest(int64_t rollback_zxid); + ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl = true); void finalize(); /// Set of methods for creating snapshots /// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version. - void enableSnapshotMode(size_t up_to_version) { container.enableSnapshotMode(up_to_version); } + void enableSnapshotMode(size_t up_to_version) + { + container.enableSnapshotMode(up_to_version); + + } /// Turn off snapshot mode. - void disableSnapshotMode() { container.disableSnapshotMode(); } + void disableSnapshotMode() + { + container.disableSnapshotMode(); + } - Container::const_iterator getSnapshotIteratorBegin() const { return container.begin(); } + Container::const_iterator getSnapshotIteratorBegin() const + { + return container.begin(); + } /// Clear outdated data from internal container. - void clearGarbageAfterSnapshot() { container.clearOutdatedNodes(); } + void clearGarbageAfterSnapshot() + { + container.clearOutdatedNodes(); + } /// Get all active sessions - const SessionAndTimeout & getActiveSessions() const { return session_and_timeout; } + const SessionAndTimeout & getActiveSessions() const + { + return session_and_timeout; + } /// Get all dead sessions - std::vector getDeadSessions() const { return session_expiry_queue.getExpiredSessions(); } + std::vector getDeadSessions() const + { + return session_expiry_queue.getExpiredSessions(); + } /// Introspection functions mostly used in 4-letter commands - uint64_t getNodesCount() const { return container.size(); } + uint64_t getNodesCount() const + { + return container.size(); + } - uint64_t getApproximateDataSize() const { return container.getApproximateDataSize(); } + uint64_t getApproximateDataSize() const + { + return container.getApproximateDataSize(); + } - uint64_t getArenaDataSize() const { return container.keyArenaSize(); } + uint64_t getArenaDataSize() const + { + return container.keyArenaSize(); + } uint64_t getTotalWatchesCount() const; - uint64_t getWatchedPathsCount() const { return watches.size() + list_watches.size(); } + uint64_t getWatchedPathsCount() const + { + return watches.size() + list_watches.size(); + } uint64_t getSessionsWithWatchesCount() const; - uint64_t getSessionWithEphemeralNodesCount() const { return ephemerals.size(); } + uint64_t getSessionWithEphemeralNodesCount() const + { + return ephemerals.size(); + } uint64_t getTotalEphemeralNodesCount() const; void dumpWatches(WriteBufferFromOwnString & buf) const; diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.h b/src/Coordination/WriteBufferFromNuraftBuffer.h index c9ca1e2a227..d52049edcff 100644 --- a/src/Coordination/WriteBufferFromNuraftBuffer.h +++ b/src/Coordination/WriteBufferFromNuraftBuffer.h @@ -12,6 +12,7 @@ public: WriteBufferFromNuraftBuffer(); nuraft::ptr getBuffer(); + bool isFinished() const { return finalized; } ~WriteBufferFromNuraftBuffer() override; diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 4d1745edc6a..e59c67329ff 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -520,7 +520,6 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * /*l if (request->getOpNum() == Coordination::OpNum::Multi && hasErrorsInMultiRequest(request)) return true; - storage.preprocessRequest(request, session_id, time, zxid, /* check_acl = */ false); storage.processRequest(request, session_id, time, zxid, /* check_acl = */ false); } } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 2742f48f49e..cf4d1eaf9f2 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1,8 +1,6 @@ #include #include -#include "Common/ZooKeeper/IKeeper.h" -#include "Coordination/KeeperStorage.h" #include "config_core.h" #if USE_NURAFT @@ -1263,7 +1261,6 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint changelog.append(entry); changelog.end_of_append_batch(0, 0); - state_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); state_machine->commit(i, changelog.entry_at(i)->get_buf()); bool snapshot_created = false; if (i % settings->snapshot_distance == 0) @@ -1308,7 +1305,6 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint for (size_t i = restore_machine->last_commit_index() + 1; i < restore_changelog.next_slot(); ++i) { - restore_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); restore_machine->commit(i, changelog.entry_at(i)->get_buf()); } @@ -1411,7 +1407,6 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) request_c->path = "/hello"; request_c->is_ephemeral = true; auto entry_c = getLogEntryFromZKRequest(0, 1, request_c); - state_machine->pre_commit(1, entry_c->get_buf()); state_machine->commit(1, entry_c->get_buf()); const auto & storage = state_machine->getStorage(); @@ -1420,7 +1415,6 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) request_d->path = "/hello"; /// Delete from other session auto entry_d = getLogEntryFromZKRequest(0, 2, request_d); - state_machine->pre_commit(2, entry_d->get_buf()); state_machine->commit(2, entry_d->get_buf()); EXPECT_EQ(storage.ephemerals.size(), 0); @@ -1783,130 +1777,6 @@ TEST_P(CoordinationTest, TestLogGap) EXPECT_EQ(changelog1.next_slot(), 61); } -template -ResponseType getSingleResponse(const auto & responses) -{ - EXPECT_FALSE(responses.empty()); - return dynamic_cast(*responses[0].response); -} - -TEST_P(CoordinationTest, TestUncommittedStateBasicCrud) -{ - using namespace DB; - using namespace Coordination; - - DB::KeeperStorage storage{500, ""}; - - constexpr std::string_view path = "/test"; - - const auto get_committed_data = [&]() -> std::optional - { - auto request = std::make_shared(); - request->path = path; - auto responses = storage.processRequest(request, 0, 0, std::nullopt, true, true); - const auto & get_response = getSingleResponse(responses); - - if (get_response.error != Error::ZOK) - return std::nullopt; - - return get_response.data; - }; - - const auto preprocess_get = [&](int64_t zxid) - { - auto get_request = std::make_shared(); - get_request->path = path; - storage.preprocessRequest(get_request, 0, 0, zxid); - return get_request; - }; - - const auto create_request = std::make_shared(); - create_request->path = path; - create_request->data = "initial_data"; - storage.preprocessRequest(create_request, 0, 0, 1); - storage.preprocessRequest(create_request, 0, 0, 2); - - ASSERT_FALSE(get_committed_data()); - - const auto after_create_get = preprocess_get(3); - - ASSERT_FALSE(get_committed_data()); - - const auto set_request = std::make_shared(); - set_request->path = path; - set_request->data = "new_data"; - storage.preprocessRequest(set_request, 0, 0, 4); - - const auto after_set_get = preprocess_get(5); - - ASSERT_FALSE(get_committed_data()); - - const auto remove_request = std::make_shared(); - remove_request->path = path; - storage.preprocessRequest(remove_request, 0, 0, 6); - storage.preprocessRequest(remove_request, 0, 0, 7); - - const auto after_remove_get = preprocess_get(8); - - ASSERT_FALSE(get_committed_data()); - - { - const auto responses = storage.processRequest(create_request, 0, 0, 1); - const auto & create_response = getSingleResponse(responses); - ASSERT_EQ(create_response.error, Error::ZOK); - } - - { - const auto responses = storage.processRequest(create_request, 0, 0, 2); - const auto & create_response = getSingleResponse(responses); - ASSERT_EQ(create_response.error, Error::ZNODEEXISTS); - } - - { - const auto responses = storage.processRequest(after_create_get, 0, 0, 3); - const auto & get_response = getSingleResponse(responses); - ASSERT_EQ(get_response.error, Error::ZOK); - ASSERT_EQ(get_response.data, "initial_data"); - } - - ASSERT_EQ(get_committed_data(), "initial_data"); - - { - const auto responses = storage.processRequest(set_request, 0, 0, 4); - const auto & create_response = getSingleResponse(responses); - ASSERT_EQ(create_response.error, Error::ZOK); - } - - { - const auto responses = storage.processRequest(after_set_get, 0, 0, 5); - const auto & get_response = getSingleResponse(responses); - ASSERT_EQ(get_response.error, Error::ZOK); - ASSERT_EQ(get_response.data, "new_data"); - } - - ASSERT_EQ(get_committed_data(), "new_data"); - - { - const auto responses = storage.processRequest(remove_request, 0, 0, 6); - const auto & create_response = getSingleResponse(responses); - ASSERT_EQ(create_response.error, Error::ZOK); - } - - { - const auto responses = storage.processRequest(remove_request, 0, 0, 7); - const auto & create_response = getSingleResponse(responses); - ASSERT_EQ(create_response.error, Error::ZNONODE); - } - - { - const auto responses = storage.processRequest(after_remove_get, 0, 0, 8); - const auto & get_response = getSingleResponse(responses); - ASSERT_EQ(get_response.error, Error::ZNONODE); - } - - ASSERT_FALSE(get_committed_data()); -} - INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, CoordinationTest, From 16e839ac71065c27567ed1cda94c98bea1b69d8c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 25 May 2022 14:54:49 +0000 Subject: [PATCH 529/615] add profile events for introspection of part types --- src/Common/ProfileEvents.cpp | 7 +++ src/Interpreters/PartLog.cpp | 3 ++ src/Interpreters/PartLog.h | 3 ++ .../MergeTree/MergeFromLogEntryTask.cpp | 1 + .../MergeTree/MergePlainMergeTreeTask.cpp | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 44 +++++++++++++++++++ src/Storages/MergeTree/MergeTreeData.h | 2 + .../MergeTree/MergeTreeDataWriter.cpp | 3 +- src/Storages/MergeTree/MergeTreeSink.cpp | 1 + .../MergeTree/ReplicatedMergeTreeSink.cpp | 1 + .../02306_part_types_profile_events.reference | 7 +++ .../02306_part_types_profile_events.sql | 44 +++++++++++++++++++ 12 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02306_part_types_profile_events.reference create mode 100644 tests/queries/0_stateless/02306_part_types_profile_events.sql diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 7f3b9788c1f..9fa47ff959c 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -144,6 +144,13 @@ M(MergeTreeDataWriterBlocks, "Number of blocks INSERTed to MergeTree tables. Each block forms a data part of level zero.") \ M(MergeTreeDataWriterBlocksAlreadySorted, "Number of blocks INSERTed to MergeTree tables that appeared to be already sorted.") \ \ + M(InsertedWideParts, "Number of parts inserted in Wide format.") \ + M(InsertedCompactParts, "Number of parts inserted in Compact format.") \ + M(InsertedInMemoryParts, "Number of parts inserted in InMemory format.") \ + M(MergedIntoWideParts, "Number of parts merged into Wide format.") \ + M(MergedIntoCompactParts, "Number of parts merged into Compact format.") \ + M(MergedIntoInMemoryParts, "Number of parts in merged into InMemory format.") \ + \ M(MergeTreeDataProjectionWriterRows, "Number of rows INSERTed to MergeTree tables projection.") \ M(MergeTreeDataProjectionWriterUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) INSERTed to MergeTree tables projection.") \ M(MergeTreeDataProjectionWriterCompressedBytes, "Bytes written to filesystem for data INSERTed to MergeTree tables projection.") \ diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index ce9aa0c03d1..6d57f6b7045 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -46,6 +46,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() {"table", std::make_shared()}, {"part_name", std::make_shared()}, {"partition_id", std::make_shared()}, + {"part_type", std::make_shared()}, {"disk_name", std::make_shared()}, {"path_on_disk", std::make_shared()}, @@ -80,6 +81,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(table_name); columns[i++]->insert(part_name); columns[i++]->insert(partition_id); + columns[i++]->insert(part_type.toString()); columns[i++]->insert(disk_name); columns[i++]->insert(path_on_disk); @@ -159,6 +161,7 @@ bool PartLog::addNewParts( elem.part_name = part->name; elem.disk_name = part->volume->getDisk()->getName(); elem.path_on_disk = part->getFullPath(); + elem.part_type = part->getType(); elem.bytes_compressed_on_disk = part->getBytesOnDisk(); elem.rows = part->rows_count; diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 7582f6fe9e6..470dce09fa0 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -35,6 +36,8 @@ struct PartLogElement String disk_name; String path_on_disk; + MergeTreeDataPartType part_type; + /// Size of the part UInt64 rows = 0; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 4b8860aa51d..66abe32ac25 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -322,6 +322,7 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); + storage.incrementMergedPartsProfileEvent(part->getType()); return true; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 0146ce4c7b3..c6a719fbc67 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -117,6 +117,7 @@ void MergePlainMergeTreeTask::finish() new_part = merge_task->getFuture().get(); storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, nullptr); write_part_log({}); + storage.incrementMergedPartsProfileEvent(new_part->getType()); } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 50811daa4ab..62c11a31f68 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -93,6 +93,12 @@ namespace ProfileEvents extern const Event DelayedInserts; extern const Event DelayedInsertsMilliseconds; extern const Event DuplicatedInsertedBlocks; + extern const Event InsertedWideParts; + extern const Event InsertedCompactParts; + extern const Event InsertedInMemoryParts; + extern const Event MergedIntoWideParts; + extern const Event MergedIntoCompactParts; + extern const Event MergedIntoInMemoryParts; } namespace CurrentMetrics @@ -1716,6 +1722,7 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa part_log_elem.part_name = part->name; part_log_elem.bytes_compressed_on_disk = part->getBytesOnDisk(); part_log_elem.rows = part->rows_count; + part_log_elem.part_type = part->getType(); part_log->add(part_log_elem); } @@ -6190,6 +6197,7 @@ try part_log_elem.path_on_disk = result_part->getFullPath(); part_log_elem.bytes_compressed_on_disk = result_part->getBytesOnDisk(); part_log_elem.rows = result_part->rows_count; + part_log_elem.part_type = result_part->getType(); } part_log_elem.source_part_names.reserve(source_parts.size()); @@ -6755,6 +6763,42 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & return std::make_shared(*this, metadata_snapshot, object_columns, std::move(snapshot_data)); } +#define FOR_EACH_PART_TYPE(M) \ + M(Wide) \ + M(Compact) \ + M(InMemory) + +#define DECLARE_INCREMENT_EVENT_CASE(Event, Type) \ + case MergeTreeDataPartType::Type: \ + ProfileEvents::increment(ProfileEvents::Event##Type##Parts); \ + break; + +#define DECLARE_INCREMENT_EVENT(value, CASE) \ + switch (value) \ + { \ + FOR_EACH_PART_TYPE(CASE) \ + default: \ + break; \ + } + +void MergeTreeData::incrementInsertedPartsProfileEvent(MergeTreeDataPartType type) +{ + #define DECLARE_INSERTED_EVENT_CASE(Type) DECLARE_INCREMENT_EVENT_CASE(Inserted, Type) + DECLARE_INCREMENT_EVENT(type.getValue(), DECLARE_INSERTED_EVENT_CASE) + #undef DECLARE_INSERTED_EVENT +} + +void MergeTreeData::incrementMergedPartsProfileEvent(MergeTreeDataPartType type) +{ + #define DECLARE_MERGED_EVENT_CASE(Type) DECLARE_INCREMENT_EVENT_CASE(MergedInto, Type) + DECLARE_INCREMENT_EVENT(type.getValue(), DECLARE_MERGED_EVENT_CASE) + #undef DECLARE_MERGED_EVENT +} + +#undef FOR_EACH_PART_TYPE +#undef DECLARE_INCREMENT_EVENT_CASE +#undef DECLARE_INCREMENT_EVENT + CurrentlySubmergingEmergingTagger::~CurrentlySubmergingEmergingTagger() { std::lock_guard lock(storage.currently_submerging_emerging_mutex); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index df37cd000e4..1ba09251f6f 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1224,6 +1224,8 @@ protected: /// Moves part to specified space, used in ALTER ... MOVE ... queries bool movePartsToSpace(const DataPartsVector & parts, SpacePtr space); + static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type); + static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type); private: /// RAII Wrapper for atomic work with currently moving parts diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index bf247074f57..7e08fb0ccfc 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -451,6 +451,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( temp_part.streams.emplace_back(std::move(stream)); } } + auto finalizer = out->finalizePartAsync( new_data_part, data_settings->fsync_after_insert, @@ -460,8 +461,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( temp_part.part = new_data_part; temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)}); - /// out.finish(new_data_part, std::move(written_files), sync_on_insert); - ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows()); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes()); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterCompressedBytes, new_data_part->getBytesOnDisk()); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 93b9f356595..4dc4d62c2a2 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -137,6 +137,7 @@ void MergeTreeSink::finishDelayedChunk() if (storage.renameTempPartAndAdd(part, context->getCurrentTransaction().get(), &storage.increment, nullptr, storage.getDeduplicationLog(), partition.block_dedup_token)) { PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns); + storage.incrementInsertedPartsProfileEvent(part->getType()); /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. storage.background_operations_assignee.trigger(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 126d34bcc1d..de893d59b05 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -261,6 +261,7 @@ void ReplicatedMergeTreeSink::finishDelayedChunk(zkutil::ZooKeeperPtr & zookeepe /// Set a special error code if the block is duplicate int error = (deduplicate && part->is_duplicate) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns, ExecutionStatus(error)); + storage.incrementInsertedPartsProfileEvent(part->getType()); } catch (...) { diff --git a/tests/queries/0_stateless/02306_part_types_profile_events.reference b/tests/queries/0_stateless/02306_part_types_profile_events.reference new file mode 100644 index 00000000000..7b5495f39fe --- /dev/null +++ b/tests/queries/0_stateless/02306_part_types_profile_events.reference @@ -0,0 +1,7 @@ +3 1 2 +2 1 1 +Compact +Compact +Wide +Compact 1 +Wide 1 diff --git a/tests/queries/0_stateless/02306_part_types_profile_events.sql b/tests/queries/0_stateless/02306_part_types_profile_events.sql new file mode 100644 index 00000000000..0ec13bc3827 --- /dev/null +++ b/tests/queries/0_stateless/02306_part_types_profile_events.sql @@ -0,0 +1,44 @@ +DROP TABLE IF EXISTS t_parts_profile_events; + +CREATE TABLE t_parts_profile_events (a UInt32) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS min_rows_for_wide_part = 10, min_bytes_for_wide_part = 0; + +SYSTEM STOP MERGES t_parts_profile_events; + +SET log_comment = '02306_part_types_profile_events'; + +INSERT INTO t_parts_profile_events VALUES (1); +INSERT INTO t_parts_profile_events VALUES (1); + +SYSTEM START MERGES t_parts_profile_events; +OPTIMIZE TABLE t_parts_profile_events FINAL; +SYSTEM STOP MERGES t_parts_profile_events; + +INSERT INTO t_parts_profile_events SELECT number FROM numbers(20); + +SYSTEM START MERGES t_parts_profile_events; +OPTIMIZE TABLE t_parts_profile_events FINAL; +SYSTEM STOP MERGES t_parts_profile_events; + +SYSTEM FLUSH LOGS; + +SELECT count(), sum(ProfileEvents['InsertedWideParts']), sum(ProfileEvents['InsertedCompactParts']) + FROM system.query_log WHERE has(databases, currentDatabase()) + AND log_comment = '02306_part_types_profile_events' + AND query ILIKE 'INSERT INTO%' AND type = 'QueryFinish'; + +SELECT count(), sum(ProfileEvents['MergedIntoWideParts']), sum(ProfileEvents['MergedIntoCompactParts']) + FROM system.query_log WHERE has(databases, currentDatabase()) + AND log_comment = '02306_part_types_profile_events' + AND query ILIKE 'OPTIMIZE TABLE%' AND type = 'QueryFinish'; + +SELECT part_type FROM system.part_log WHERE database = currentDatabase() + AND table = 't_parts_profile_events' AND event_type = 'NewPart' + ORDER BY event_time_microseconds; + +SELECT part_type, count() > 0 FROM system.part_log WHERE database = currentDatabase() + AND table = 't_parts_profile_events' AND event_type = 'MergeParts' + GROUP BY part_type; + +DROP TABLE t_parts_profile_events; From a33c7ce648ea3dc70b4aff08b65bdd2dfa5838d5 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 25 May 2022 22:58:47 +0800 Subject: [PATCH 530/615] fix --- src/Common/FileCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index c28794ab23e..c2af2e07099 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -935,7 +935,7 @@ LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add( const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard & /* cache_lock */) { #ifndef NDEBUG - for (const auto & [entry_key, entry_offset, _, __] : queue) + for (const auto & [entry_key, entry_offset, entry_size, entry_hits] : queue) { if (entry_key == key && entry_offset == offset) throw Exception( From 7b681fa8ac3092ae31ab0e236229236f2336f0e6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 25 May 2022 17:15:23 +0200 Subject: [PATCH 531/615] Fixing build. --- src/Interpreters/Context.cpp | 2 +- src/Interpreters/Context.h | 40 +++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index e0f7645b48e..3380ac78da0 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1079,7 +1079,7 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query factories info"); - std::lock_guard lock(query_factories_info_mutex); + std::lock_guard lock(query_factories_info.mutex); switch (factory_type) { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index cf808dff582..def63b4fba3 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -291,6 +291,43 @@ private: /// Record names of created objects of factories (for testing, etc) struct QueryFactoriesInfo { + QueryFactoriesInfo() = default; + + QueryFactoriesInfo(const QueryFactoriesInfo & rhs) + { + std::lock_guard lock(rhs.mutex); + aggregate_functions = rhs.aggregate_functions; + aggregate_function_combinators = rhs.aggregate_function_combinators; + database_engines = rhs.database_engines; + data_type_families = rhs.data_type_families; + dictionaries = rhs.dictionaries; + formats = rhs.formats; + functions = rhs.functions; + storages = rhs.storages; + table_functions = rhs.table_functions; + } + + QueryFactoriesInfo(QueryFactoriesInfo && rhs) = delete; + + QueryFactoriesInfo & operator=(QueryFactoriesInfo rhs) + { + swap(rhs); + return *this; + } + + void swap(QueryFactoriesInfo & rhs) + { + std::swap(aggregate_functions, rhs.aggregate_functions); + std::swap(aggregate_function_combinators, rhs.aggregate_function_combinators); + std::swap(database_engines, rhs.database_engines); + std::swap(data_type_families, rhs.data_type_families); + std::swap(dictionaries, rhs.dictionaries); + std::swap(formats, rhs.formats); + std::swap(functions, rhs.functions); + std::swap(storages, rhs.storages); + std::swap(table_functions, rhs.table_functions); + } + std::unordered_set aggregate_functions; std::unordered_set aggregate_function_combinators; std::unordered_set database_engines; @@ -300,11 +337,12 @@ private: std::unordered_set functions; std::unordered_set storages; std::unordered_set table_functions; + + std::mutex mutex; }; /// Needs to be changed while having const context in factories methods mutable QueryFactoriesInfo query_factories_info; - mutable std::mutex query_factories_info_mutex; /// TODO: maybe replace with temporary tables? StoragePtr view_source; /// Temporary StorageValues used to generate alias columns for materialized views From 1f9b1cf7260ba4b6322ae16ba31241d447db6cc6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 25 May 2022 18:59:46 +0200 Subject: [PATCH 532/615] Fixing build. --- src/Interpreters/Context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index def63b4fba3..807e9b51350 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -338,7 +338,7 @@ private: std::unordered_set storages; std::unordered_set table_functions; - std::mutex mutex; + mutable std::mutex mutex; }; /// Needs to be changed while having const context in factories methods From 97c5a4c725b074006b8074b2012aa6f184c91d29 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 25 May 2022 20:04:15 +0300 Subject: [PATCH 533/615] Update SECURITY.md --- SECURITY.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 6c03a6bb945..426559a0439 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -25,9 +25,11 @@ The following versions of ClickHouse server are currently being supported with s | 21.10 | :x: | | 21.11 | :x: | | 21.12 | :x: | -| 22.1 | ✅ | -| 22.2 | ✅ | +| 22.1 | :x: | +| 22.2 | :x: | | 22.3 | ✅ | +| 22.4 | ✅ | +| 22.5 | ✅ | ## Reporting a Vulnerability From df84be9b43a268d2f04e46a3cf21821c8896edde Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 25 May 2022 20:04:20 +0300 Subject: [PATCH 534/615] Update SECURITY.md --- SECURITY.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 6c03a6bb945..426559a0439 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -25,9 +25,11 @@ The following versions of ClickHouse server are currently being supported with s | 21.10 | :x: | | 21.11 | :x: | | 21.12 | :x: | -| 22.1 | ✅ | -| 22.2 | ✅ | +| 22.1 | :x: | +| 22.2 | :x: | | 22.3 | ✅ | +| 22.4 | ✅ | +| 22.5 | ✅ | ## Reporting a Vulnerability From a813f5996e95e424193265bb090ef7a402497d6e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 20 Feb 2022 22:12:25 +0300 Subject: [PATCH 535/615] Fix converting types for UNION queries (may produce LOGICAL_ERROR) CI founds [1]: 2022.02.20 15:14:23.969247 [ 492 ] {} BaseDaemon: (version 22.3.1.1, build id: 6082C357CFA6FF99) (from thread 472) (query_id: a5187ff9-962a-4e7c-86f6-8d48850a47d6) (query: SELECT 0., round(avgWeighted(x, y)) FROM (SELECT toDate(toDate('214748364.8', '-922337203.6854775808', '-0.1', NULL) - NULL, 10.000100135803223, '-2147483647'), 255 AS x, -2147483647 AS y UNION ALL SELECT y, NULL AS x, 2147483646 AS y)) Received signal Aborted (6) [1]: https://s3.amazonaws.com/clickhouse-test-reports/0/26d0e5438c86e52a145aaaf4cb523c399989a878/fuzzer_astfuzzerdebug,actions//report.html The problem is that subqueries returns different headers: - first query -- x, y - second query -- y, x v2: Make order of columns strict only for UNION https://s3.amazonaws.com/clickhouse-test-reports/34775/9cc8c01a463d18c471853568b2f0af659a4e643f/stateless_tests__address__actions__[2/2].html Fixes: 00597_push_down_predicate_long Signed-off-by: Azat Khuzhin --- .../InterpreterSelectWithUnionQuery.cpp | 4 ++ src/Interpreters/SelectQueryOptions.h | 8 ++++ src/Interpreters/TreeRewriter.cpp | 30 ++++++++++++- .../02227_union_match_by_name.reference | 44 +++++++++++++++++++ .../0_stateless/02227_union_match_by_name.sql | 3 ++ 5 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02227_union_match_by_name.reference create mode 100644 tests/queries/0_stateless/02227_union_match_by_name.sql diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 7506c3013cb..94ebfd73513 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -46,6 +46,10 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( if (!num_children) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + /// This is required for UNION to match headers correctly. + if (num_children > 1) + options.reorderColumns(); + /// Note that we pass 'required_result_column_names' to first SELECT. /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, /// because names could be different. diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 31ed9d8c686..b0183e2761b 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -31,6 +31,8 @@ struct SelectQueryOptions bool only_analyze = false; bool modify_inplace = false; bool remove_duplicates = false; + /// This is required for UNION to match headers correctly. + bool reorder_columns_as_required_header = false; bool ignore_quota = false; bool ignore_limits = false; /// This flag is needed to analyze query ignoring table projections. @@ -97,6 +99,12 @@ struct SelectQueryOptions return *this; } + SelectQueryOptions & reorderColumns(bool value = true) + { + reorder_columns_as_required_header = value; + return *this; + } + SelectQueryOptions & noSubquery() { subquery_depth = 0; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index c90421d6f4f..11a392f3adf 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -422,7 +422,7 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query) /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. /// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible. /// Also remove all INTERPOLATE columns which are not in SELECT anymore. -void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups) +void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups, bool reorder_columns_as_required_header) { ASTs & elements = select_query->select()->children; @@ -453,6 +453,29 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const NameSet remove_columns; + /// Resort columns according to required_result_columns. + if (reorder_columns_as_required_header && !required_result_columns.empty()) + { + std::unordered_map name_pos; + { + size_t pos = 0; + for (const auto & name : required_result_columns) + name_pos[name] = pos++; + } + std::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs) + { + String lhs_name = lhs->getAliasOrColumnName(); + String rhs_name = rhs->getAliasOrColumnName(); + size_t lhs_pos = name_pos.size(); + size_t rhs_pos = name_pos.size(); + if (auto it = name_pos.find(lhs_name); it != name_pos.end()) + lhs_pos = it->second; + if (auto it = name_pos.find(rhs_name); it != name_pos.end()) + rhs_pos = it->second; + return lhs_pos < rhs_pos; + }); + } + for (const auto & elem : elements) { String name = elem->getAliasOrColumnName(); @@ -465,6 +488,8 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const } else if (select_query->distinct || hasArrayJoin(elem)) { + /// ARRAY JOIN cannot be optimized out since it may change number of rows, + /// so as DISTINCT. new_elements.push_back(elem); } else @@ -1135,6 +1160,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( size_t subquery_depth = select_options.subquery_depth; bool remove_duplicates = select_options.remove_duplicates; + bool reorder_columns_as_required_header = select_options.reorder_columns_as_required_header; const auto & settings = getContext()->getSettingsRef(); @@ -1186,7 +1212,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. - removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates); + removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates, reorder_columns_as_required_header); /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze); diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference new file mode 100644 index 00000000000..72c4987a3d2 --- /dev/null +++ b/tests/queries/0_stateless/02227_union_match_by_name.reference @@ -0,0 +1,44 @@ +-- { echo } +EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); +Expression (Projection) +Header: avgWeighted(x, y) Nullable(Float64) + Expression (Before ORDER BY) + Header: avgWeighted(x, y) Nullable(Float64) + Aggregating + Header: avgWeighted(x, y) Nullable(Float64) + Expression (Before GROUP BY) + Header: x Nullable(UInt8) + y UInt8 + Union + Header: x Nullable(UInt8) + y UInt8 + Expression (Conversion before UNION) + Header: x Nullable(UInt8) + y UInt8 + Expression (Projection) + Header: x UInt8 + y UInt8 + Expression (Before ORDER BY) + Header: 255 UInt8 + 1 UInt8 + dummy UInt8 + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (Conversion before UNION) + Header: x Nullable(UInt8) + y UInt8 + Expression (Projection) + Header: x Nullable(Nothing) + y UInt8 + Expression (Before ORDER BY) + Header: NULL Nullable(Nothing) + 1 UInt8 + dummy UInt8 + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); +255 diff --git a/tests/queries/0_stateless/02227_union_match_by_name.sql b/tests/queries/0_stateless/02227_union_match_by_name.sql new file mode 100644 index 00000000000..cc0ab8ba5aa --- /dev/null +++ b/tests/queries/0_stateless/02227_union_match_by_name.sql @@ -0,0 +1,3 @@ +-- { echo } +EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); +SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); From 620ab399c99ecce92848d1c3101cbd60572393ad Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 25 May 2022 20:23:24 +0200 Subject: [PATCH 536/615] Update docs/en/operations/clickhouse-keeper.md --- docs/en/operations/clickhouse-keeper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 7dbe0601343..e4d10967bc8 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -13,7 +13,7 @@ ZooKeeper is one of the first well-known open-source coordination systems. It's By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but the `clickhouse-keeper-converter` tool enables the conversion of ZooKeeper data to ClickHouse Keeper snapshots. The interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so a mixed ZooKeeper / ClickHouse Keeper cluster is impossible. -ClickHouse Keeper supports Access Control Lists (ACLs) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth`, `digest`, `host` and `ip`. The digest authentication scheme uses the pair `username:password`, the password is encoded in Base64. +ClickHouse Keeper supports Access Control Lists (ACLs) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth` and `digest`. The digest authentication scheme uses the pair `username:password`, the password is encoded in Base64. :::note External integrations are not supported. From 4482da9eb62fca7aeb128a0f9c72f61a0c3ccd89 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 25 May 2022 21:59:31 +0300 Subject: [PATCH 537/615] Update greatCircleDistance.cpp --- src/Functions/greatCircleDistance.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 7935510ad41..ab8b37af960 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -272,7 +272,8 @@ private: dst_data.resize(input_rows_count); auto arguments_copy = arguments; - for (auto & argument : arguments_copy) { + for (auto & argument : arguments_copy) + { argument.column = argument.column->convertToFullColumnIfConst(); argument.column = castColumn(argument, result_type); argument.type = result_type; From c7b16065e14b4e7f5c4179bc0262407ae4cbec6e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 25 May 2022 21:47:05 +0200 Subject: [PATCH 538/615] Merge with master --- src/Common/ErrorCodes.cpp | 2 +- .../ObjectStorages/DiskObjectStorage.cpp | 11 +++++ src/Disks/ObjectStorages/DiskObjectStorage.h | 4 ++ .../DiskObjectStorageMetadataHelper.cpp | 41 ++++++++++++++--- .../DiskObjectStorageMetadataHelper.h | 45 ++++++++++++++----- src/Disks/ObjectStorages/S3/diskSettings.cpp | 9 ++-- .../ObjectStorages/S3/registerDiskS3.cpp | 1 + src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- src/Storages/StorageS3.cpp | 6 ++- src/Storages/StorageS3.h | 7 ++- 10 files changed, 103 insertions(+), 25 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ce457cda1f2..973dde10756 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -627,8 +627,8 @@ M(656, MEILISEARCH_EXCEPTION) \ M(657, UNSUPPORTED_MEILISEARCH_TYPE) \ M(658, MEILISEARCH_MISSING_SOME_COLUMNS) \ - M(659, HDFS_ERROR) \ M(659, UNKNOWN_STATUS_OF_TRANSACTION) \ + M(660, HDFS_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index c235e1a864a..65b1d5a5bdf 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -622,6 +622,17 @@ void DiskObjectStorage::restoreMetadataIfNeeded(const Poco::Util::AbstractConfig } } +void DiskObjectStorage::syncRevision(UInt64 revision) +{ + metadata_helper->syncRevision(revision); +} + +UInt64 DiskObjectStorage::getRevision() const +{ + return metadata_helper->getRevision(); +} + + DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const { if (i != 0) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 9a60a7ad25e..d89c00a5567 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -172,6 +172,10 @@ public: void restoreMetadataIfNeeded(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); void onFreeze(const String & path) override; + + void syncRevision(UInt64 revision) override; + + UInt64 getRevision() const override; private: const String name; const String remote_fs_root_path; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp index a7e34f7ccd4..b09debf9a43 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp @@ -23,7 +23,7 @@ static String revisionToString(UInt64 revision) void DiskObjectStorageMetadataHelper::createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const { - const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + "-" + operation_name; + const String path = disk->remote_fs_root_path + "operations/r" + revisionToString(revision) + operation_log_suffix + "-" + operation_name; auto buf = disk->object_storage->writeObject(path, WriteMode::Rewrite, metadata); buf->write('0'); buf->finalize(); @@ -300,15 +300,45 @@ static String shrinkKey(const String & path, const String & key) static std::tuple extractRevisionAndOperationFromKey(const String & key) { String revision_str; + String suffix; String operation; - /// Key has format: ../../r{revision}-{operation} - static const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; + /// Key has format: ../../r{revision}(-{hostname})-{operation} + static const re2::RE2 key_regexp{".*/r(\\d+)(-[\\w\\d\\-\\.]+)?-(\\w+)$"}; - re2::RE2::FullMatch(key, key_regexp, &revision_str, &operation); + re2::RE2::FullMatch(key, key_regexp, &revision_str, &suffix, &operation); return {(revision_str.empty() ? 0 : static_cast(std::bitset<64>(revision_str).to_ullong())), operation}; } +void DiskObjectStorageMetadataHelper::moveRecursiveOrRemove(const String & from_path, const String & to_path, bool send_metadata) +{ + if (disk->exists(to_path)) + { + if (send_metadata) + { + auto revision = ++revision_counter; + const ObjectAttributes object_metadata { + {"from_path", from_path}, + {"to_path", to_path} + }; + createFileOperationObject("rename", revision, object_metadata); + } + if (disk->isDirectory(from_path)) + { + for (auto it = disk->iterateDirectory(from_path); it->isValid(); it->next()) + moveRecursiveOrRemove(it->path(), fs::path(to_path) / it->name(), false); + } + else + { + disk->removeFile(from_path); + } + } + else + { + disk->moveFile(from_path, to_path, send_metadata); + } +} + void DiskObjectStorageMetadataHelper::restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information) { LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); @@ -385,7 +415,6 @@ void DiskObjectStorageMetadataHelper::processRestoreFiles(IObjectStorage * sourc else continue; - disk->createDirectories(directoryPath(path)); auto relative_key = shrinkKey(source_path, key); @@ -457,7 +486,7 @@ void DiskObjectStorageMetadataHelper::restoreFileOperations(IObjectStorage * sou auto to_path = object_attributes["to_path"]; if (disk->exists(from_path)) { - disk->moveFile(from_path, to_path, send_metadata); + moveRecursiveOrRemove(from_path, to_path, send_metadata); LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h index 89153e4a39c..58ef8405a13 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -25,9 +26,37 @@ public: DiskObjectStorageMetadataHelper(DiskObjectStorage * disk_, ReadSettings read_settings_) : disk(disk_) , read_settings(std::move(read_settings_)) + , operation_log_suffix("-" + getFQDNOrHostName()) { } + /// Most important method, called on DiskObjectStorage startup + void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); + + void syncRevision(UInt64 revision) + { + UInt64 local_revision = revision_counter.load(); + while ((revision > local_revision) && revision_counter.compare_exchange_weak(local_revision, revision)); + } + + UInt64 getRevision() const + { + return revision_counter.load(); + } + + static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); + + void migrateToRestorableSchema(); + + void findLastRevision(); + + void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; + + /// Version with possibility to backup-restore metadata. + static constexpr int RESTORABLE_SCHEMA_VERSION = 1; + + std::atomic revision_counter = 0; +private: struct RestoreInformation { UInt64 revision = LATEST_REVISION; @@ -38,32 +67,24 @@ public: using Futures = std::vector>; - void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectAttributes & metadata) const; + /// Move file or files in directory when possible and remove files in other case + /// to restore by S3 operation log with same operations from different replicas + void moveRecursiveOrRemove(const String & from_path, const String & to_path, bool send_metadata); - void findLastRevision(); - - static int readSchemaVersion(IObjectStorage * object_storage, const String & source_path); void saveSchemaVersion(const int & version) const; void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; void migrateFileToRestorableSchema(const String & path) const; void migrateToRestorableSchemaRecursive(const String & path, Futures & results); - void migrateToRestorableSchema(); - - /// Most important method, called on DiskObjectStorage startup - void restore(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); void readRestoreInformation(RestoreInformation & restore_information); void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); void processRestoreFiles(IObjectStorage * source_object_storage, const String & source_path, const std::vector & keys) const; void restoreFileOperations(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); - std::atomic revision_counter = 0; inline static const String RESTORE_FILE_NAME = "restore"; /// Object contains information about schema version. inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; - /// Version with possibility to backup-restore metadata. - static constexpr int RESTORABLE_SCHEMA_VERSION = 1; /// Directories with data. const std::vector data_roots {"data", "store"}; @@ -72,6 +93,8 @@ public: ObjectStoragePtr object_storage_from_another_namespace; ReadSettings read_settings; + + String operation_log_suffix; }; } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 145bb4a3d66..79a7978c53e 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -14,9 +14,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -149,4 +149,5 @@ std::unique_ptr getClient(const Poco::Util::AbstractConfigura } } ->>>>>> master:src/Disks/S3/registerDiskS3.cpp + +#endif diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 9c9c76ad451..d7e82ef3392 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -10,6 +10,7 @@ #include +#include #include #include diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 78896d74d09..0c834564ec4 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -518,7 +518,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (!disk) disk = reservation->getDisk(); - UInt64 revision = parse(in.getResponseCookie("disk_revision", "0")); + UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); if (revision) disk->syncRevision(revision); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 7960b7dfac0..1dbf7b36f1b 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -239,7 +239,11 @@ private: }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( - const Aws::S3::S3Client & client_, const S3::URI & globbed_uri_, ASTPtr query, const Block & virtual_header, ContextPtr context) + const Aws::S3::S3Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context) : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context)) { } diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index ef16982ba58..b246de18bfb 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -36,7 +36,12 @@ public: { public: DisclosedGlobIterator( - const Aws::S3::S3Client & client_, const S3::URI & globbed_uri_, ASTPtr query, const Block & virtual_header, ContextPtr context); + const Aws::S3::S3Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context); + String next(); private: From 47820c216d0798daa1ebdd29bb0ed01f53b418fa Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 25 May 2022 23:10:33 +0300 Subject: [PATCH 539/615] Revert "(only with zero-copy replication, non-production experimental feature not recommended to use) fix possible deadlock during fetching part" --- src/Storages/MergeTree/DataPartsExchange.cpp | 41 +++++++------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f6d53979663..620466b8035 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -470,28 +470,29 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( creds.setPassword(password); } - std::unique_ptr in = std::make_unique( + PooledReadWriteBufferFromHTTP in{ uri, Poco::Net::HTTPRequest::HTTP_POST, - nullptr, + {}, timeouts, creds, DBMS_DEFAULT_BUFFER_SIZE, 0, /* no redirects */ - static_cast(data_settings->replicated_max_parallel_fetches_for_host)); + data_settings->replicated_max_parallel_fetches_for_host + }; - int server_protocol_version = parse(in->getResponseCookie("server_protocol_version", "0")); + int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); ReservationPtr reservation; size_t sum_files_size = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) { - readBinary(sum_files_size, *in); + readBinary(sum_files_size, in); if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { IMergeTreeDataPart::TTLInfos ttl_infos; String ttl_infos_string; - readBinary(ttl_infos_string, *in); + readBinary(ttl_infos_string, in); ReadBufferFromString ttl_infos_buffer(ttl_infos_string); assertString("ttl format version: 1\n", ttl_infos_buffer); ttl_infos.read(ttl_infos_buffer); @@ -528,13 +529,13 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( String part_type = "Wide"; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) - readStringBinary(part_type, *in); + readStringBinary(part_type, in); UUID part_uuid = UUIDHelpers::Nil; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) - readUUIDText(part_uuid, *in); + readUUIDText(part_uuid, in); - String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); + String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); if (!remote_fs_metadata.empty()) { if (!try_zero_copy) @@ -548,7 +549,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( try { - return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, *in, throttler); + return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); } catch (const Exception & e) { @@ -556,18 +557,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( throw; LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy.")); - - /// It's important to release session from HTTP pool. Otherwise it's possible to get deadlock - /// on http pool. - try - { - in.reset(); - } - catch (...) - { - tryLogCurrentException(log); - } - /// Try again but without zero-copy return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); @@ -581,16 +570,16 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( part_info.partition_id, part_name, new_part_path, replica_path, uri, to_detached, sum_files_size); - in->setNextCallback(ReplicatedFetchReadCallback(*entry)); + in.setNextCallback(ReplicatedFetchReadCallback(*entry)); size_t projections = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) - readBinary(projections, *in); + readBinary(projections, in); MergeTreeData::DataPart::Checksums checksums; return part_type == "InMemory" - ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, *in, projections, throttler) - : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, *in, projections, checksums, throttler); + ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) + : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( From 5df14cd9567b235c28a31358b56ed068e38af19b Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 25 May 2022 00:28:28 +0200 Subject: [PATCH 540/615] Cast arguments to result type to avoid int overflow --- src/Functions/array/arrayDistance.cpp | 20 ++++++++++---------- src/Functions/array/arrayNorm.cpp | 14 +++++++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 2ef1cab4647..87b8f1979f7 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -28,8 +28,8 @@ struct L1Distance FloatType sum = 0; }; - template - static void accumulate(State & state, FirstArgType x, SecondArgType y) + template + static void accumulate(State & state, ResultType x, ResultType y) { state.sum += fabs(x - y); } @@ -51,8 +51,8 @@ struct L2Distance FloatType sum = 0; }; - template - static void accumulate(State & state, FirstArgType x, SecondArgType y) + template + static void accumulate(State & state, ResultType x, ResultType y) { state.sum += (x - y) * (x - y); } @@ -74,8 +74,8 @@ struct LinfDistance FloatType dist = 0; }; - template - static void accumulate(State & state, FirstArgType x, SecondArgType y) + template + static void accumulate(State & state, ResultType x, ResultType y) { state.dist = fmax(state.dist, fabs(x - y)); } @@ -98,8 +98,8 @@ struct CosineDistance FloatType y_squared = 0; }; - template - static void accumulate(State & state, FirstArgType x, SecondArgType y) + template + static void accumulate(State & state, ResultType x, ResultType y) { state.dot_prod += x * y; state.x_squared += x * x; @@ -288,7 +288,7 @@ private: typename Kernel::template State state; for (; prev < off; ++prev) { - Kernel::accumulate(state, data_x[prev], data_y[prev]); + Kernel::template accumulate(state, data_x[prev], data_y[prev]); } result_data[row] = Kernel::finalize(state); row++; @@ -337,7 +337,7 @@ private: typename Kernel::template State state; for (size_t i = 0; prev < off; ++i, ++prev) { - Kernel::accumulate(state, data_x[i], data_y[prev]); + Kernel::template accumulate(state, data_x[i], data_y[prev]); } result_data[row] = Kernel::finalize(state); row++; diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index 587c65a49ca..19ca43d34f5 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -21,8 +21,8 @@ struct L1Norm { static inline String name = "L1"; - template - inline static ResultType accumulate(ResultType result, ArgumentType value) + template + inline static ResultType accumulate(ResultType result, ResultType value) { return result + fabs(value); } @@ -38,8 +38,8 @@ struct L2Norm { static inline String name = "L2"; - template - inline static ResultType accumulate(ResultType result, ArgumentType value) + template + inline static ResultType accumulate(ResultType result, ResultType value) { return result + value * value; } @@ -56,8 +56,8 @@ struct LinfNorm { static inline String name = "Linf"; - template - inline static ResultType accumulate(ResultType result, ArgumentType value) + template + inline static ResultType accumulate(ResultType result, ResultType value) { return fmax(result, fabs(value)); } @@ -189,7 +189,7 @@ private: Float64 result = 0; for (; prev < off; ++prev) { - result = Kernel::accumulate(result, data[prev]); + result = Kernel::template accumulate(result, data[prev]); } result_data[row] = Kernel::finalize(result); row++; From b065839f440495c5f182de34ea02339c16d3e127 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 25 May 2022 16:29:12 +0200 Subject: [PATCH 541/615] always return Float64 --- src/Functions/array/arrayDistance.cpp | 4 --- src/Functions/array/arrayNorm.cpp | 4 --- .../02282_array_distance.reference | 29 ++++++++++--------- .../0_stateless/02282_array_distance.sql | 8 +++++ .../0_stateless/02283_array_norm.reference | 19 ++++++------ .../queries/0_stateless/02283_array_norm.sql | 7 +++++ 6 files changed, 40 insertions(+), 31 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 87b8f1979f7..87e53193139 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -145,7 +145,6 @@ public: case TypeIndex::Int16: case TypeIndex::Int32: case TypeIndex::Float32: - return std::make_shared(); case TypeIndex::UInt64: case TypeIndex::Int64: case TypeIndex::Float64: @@ -164,9 +163,6 @@ public: { switch (result_type->getTypeId()) { - case TypeIndex::Float32: - return executeWithResultType(arguments, input_rows_count); - break; case TypeIndex::Float64: return executeWithResultType(arguments, input_rows_count); break; diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index 19ca43d34f5..b166d1f2e7e 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -102,7 +102,6 @@ public: case TypeIndex::Int16: case TypeIndex::Int32: case TypeIndex::Float32: - return std::make_shared(); case TypeIndex::UInt64: case TypeIndex::Int64: case TypeIndex::Float64: @@ -125,9 +124,6 @@ public: switch (result_type->getTypeId()) { - case TypeIndex::Float32: - return executeWithResultType(*arr, type, input_rows_count); - break; case TypeIndex::Float64: return executeWithResultType(*arr, type, input_rows_count); break; diff --git a/tests/queries/0_stateless/02282_array_distance.reference b/tests/queries/0_stateless/02282_array_distance.reference index 2fd6c66c817..b7db2dceee8 100644 --- a/tests/queries/0_stateless/02282_array_distance.reference +++ b/tests/queries/0_stateless/02282_array_distance.reference @@ -1,21 +1,22 @@ 6 -3.7416575 +3.7416573867739413 3 -0.002585097 +0.00258509695694209 \N nan +0 0 0 0 12 14 21 -7.071068 -9.165152 -12.124355 +7.0710678118654755 +9.16515138991168 +12.12435565298214 2 5 4 -0.16847816 -0.35846698 -0.0741799 +0.16847815937970012 +0.3584669721282153 +0.07417990022744847 6 8 9 @@ -28,12 +29,12 @@ nan 2 2 1347.4008312302617 3 1 214.35251339790725 3 2 1342.8856987845243 -1 1 218.74643 -1 2 1348.2118 -2 1 219.28064 -2 2 1347.4009 -3 1 214.35251 -3 2 1342.8857 +1 1 218.74642854227358 +1 2 1348.2117786164013 +2 1 219.28064210048274 +2 2 1347.4008312302617 +3 1 214.35251339790725 +3 2 1342.8856987845243 1 1 218.74642854227358 1 2 1348.2117786164013 2 1 219.28064210048274 diff --git a/tests/queries/0_stateless/02282_array_distance.sql b/tests/queries/0_stateless/02282_array_distance.sql index 04de01d7d66..89214c57441 100644 --- a/tests/queries/0_stateless/02282_array_distance.sql +++ b/tests/queries/0_stateless/02282_array_distance.sql @@ -6,6 +6,14 @@ SELECT arrayCosineDistance([1, 2, 3], [3, 5, 7]); SELECT arrayL2Distance([1, 2, 3], NULL); SELECT arrayCosineDistance([1, 2, 3], [0, 0, 0]); +-- Overflows +WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -573412550, -552499284, 2096941042], 'Array(Int32)') AS a +SELECT + arrayL1Distance(a,a), + arrayL2Distance(a,a), + arrayLinfDistance(a,a), + arrayCosineDistance(a, a); + DROP TABLE IF EXISTS vec1; DROP TABLE IF EXISTS vec2; DROP TABLE IF EXISTS vec2f; diff --git a/tests/queries/0_stateless/02283_array_norm.reference b/tests/queries/0_stateless/02283_array_norm.reference index 6dd6b79e6d9..68dbce0b436 100644 --- a/tests/queries/0_stateless/02283_array_norm.reference +++ b/tests/queries/0_stateless/02283_array_norm.reference @@ -1,17 +1,18 @@ 6 7.0710678118654755 2 +10803059573 4234902446.7343364 2096941042 1 5 2 2 -3 5.196152 -4 0 -1 11 -2 11 -3 11 -4 11 -1 5 -2 2 -3 5.196152 +3 5.196152422706632 +4 0 +1 11 +2 11 +3 11 +4 11 +1 5 +2 2 +3 5.196152422706632 4 0 1 11 2 11 diff --git a/tests/queries/0_stateless/02283_array_norm.sql b/tests/queries/0_stateless/02283_array_norm.sql index e11caea7cc1..a759a47d79c 100644 --- a/tests/queries/0_stateless/02283_array_norm.sql +++ b/tests/queries/0_stateless/02283_array_norm.sql @@ -2,6 +2,13 @@ SELECT arrayL1Norm([1, 2, 3]); SELECT arrayL2Norm([3., 4., 5.]); SELECT arrayLinfNorm([0, 0, 2]); +-- Overflows +WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -573412550, -552499284, 2096941042], 'Array(Int32)') AS a +SELECT + arrayL1Norm(a), + arrayL2Norm(a), + arrayLinfNorm(a); + DROP TABLE IF EXISTS vec1; DROP TABLE IF EXISTS vec1f; DROP TABLE IF EXISTS vec1d; From 168b47d0adf9506d861fe62c087dcecadec4f7ca Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 25 May 2022 18:09:03 +0200 Subject: [PATCH 542/615] Use same norm and distance function names for tuples and arrays --- src/Functions/array/arrayDistance.cpp | 14 +- src/Functions/array/arrayNorm.cpp | 12 +- .../array/registerFunctionsArray.cpp | 4 - src/Functions/vectorFunctions.cpp | 136 ++++++++++++++++-- tests/performance/norm_distance.xml | 6 +- .../0_stateless/02282_array_distance.sql | 42 +++--- .../queries/0_stateless/02283_array_norm.sql | 26 ++-- 7 files changed, 174 insertions(+), 66 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 87e53193139..7c1cddf4435 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -167,7 +167,7 @@ public: return executeWithResultType(arguments, input_rows_count); break; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); } } @@ -343,12 +343,10 @@ private: }; -void registerFunctionArrayDistance(FunctionFactory & factory) -{ - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); -} +/// These functions are used by TupleOrArrayFunction +FunctionPtr createFunctionArrayL1Distance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } +FunctionPtr createFunctionArrayL2Distance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } +FunctionPtr createFunctionArrayLinfDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } +FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } } diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index b166d1f2e7e..b3b5aff7063 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -128,7 +128,7 @@ public: return executeWithResultType(*arr, type, input_rows_count); break; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); } } @@ -194,11 +194,9 @@ private: } }; -void registerFunctionArrayNorm(FunctionFactory & factory) -{ - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); -} +/// These functions are used by TupleOrArrayFunction +FunctionPtr createFunctionArrayL1Norm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } +FunctionPtr createFunctionArrayL2Norm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } +FunctionPtr createFunctionArrayLinfNorm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } } diff --git a/src/Functions/array/registerFunctionsArray.cpp b/src/Functions/array/registerFunctionsArray.cpp index e2e8b08fbf2..3bb27cbadf9 100644 --- a/src/Functions/array/registerFunctionsArray.cpp +++ b/src/Functions/array/registerFunctionsArray.cpp @@ -37,8 +37,6 @@ void registerFunctionArrayAUC(FunctionFactory &); void registerFunctionArrayReduceInRanges(FunctionFactory &); void registerFunctionMapOp(FunctionFactory &); void registerFunctionMapPopulateSeries(FunctionFactory &); -void registerFunctionArrayDistance(FunctionFactory &); -void registerFunctionArrayNorm(FunctionFactory &); void registerFunctionsArray(FunctionFactory & factory) { @@ -77,8 +75,6 @@ void registerFunctionsArray(FunctionFactory & factory) registerFunctionArrayAUC(factory); registerFunctionMapOp(factory); registerFunctionMapPopulateSeries(factory); - registerFunctionArrayDistance(factory); - registerFunctionArrayNorm(factory); } } diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index 62cfdcd9ee7..2c29db81dd6 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,6 +7,7 @@ #include #include #include +#include "Functions/IFunction.h" namespace DB { @@ -1065,6 +1067,120 @@ public: } }; + +/// An adaptor to call Norm/Distance function for tuple or array depending on the 1st argument type +template +class TupleOrArrayFunction : public IFunction +{ +public: + static inline String name = Traits::name; + + explicit TupleOrArrayFunction(ContextPtr context_) + : IFunction() + , tuple_function(Traits::CreateTupleFunction(context_)) + , array_function(Traits::CreateArrayFunction(context_)) {} + + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return tuple_function->getNumberOfArguments(); } + + bool useDefaultImplementationForConstants() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + bool is_array = checkDataTypes(arguments[0].type.get()); + return (is_array ? array_function : tuple_function)->getReturnTypeImpl(arguments); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + bool is_array = checkDataTypes(arguments[0].type.get()); + return (is_array ? array_function : tuple_function)->executeImpl(arguments, result_type, input_rows_count); + } + +private: + FunctionPtr tuple_function; + FunctionPtr array_function; +}; + +extern FunctionPtr createFunctionArrayL1Norm(ContextPtr context_); +extern FunctionPtr createFunctionArrayL2Norm(ContextPtr context_); +extern FunctionPtr createFunctionArrayLinfNorm(ContextPtr context_); + +extern FunctionPtr createFunctionArrayL1Distance(ContextPtr context_); +extern FunctionPtr createFunctionArrayL2Distance(ContextPtr context_); +extern FunctionPtr createFunctionArrayLinfDistance(ContextPtr context_); +extern FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_); + +struct L1NormTraits +{ + static inline String name = "L1Norm"; + + static constexpr auto CreateTupleFunction = FunctionL1Norm::create; + static constexpr auto CreateArrayFunction = createFunctionArrayL1Norm; +}; + +struct L2NormTraits +{ + static inline String name = "L2Norm"; + + static constexpr auto CreateTupleFunction = FunctionL2Norm::create; + static constexpr auto CreateArrayFunction = createFunctionArrayL2Norm; +}; + +struct LinfNormTraits +{ + static inline String name = "LinfNorm"; + + static constexpr auto CreateTupleFunction = FunctionLinfNorm::create; + static constexpr auto CreateArrayFunction = createFunctionArrayLinfNorm; +}; + +struct L1DistanceTraits +{ + static inline String name = "L1Distance"; + + static constexpr auto CreateTupleFunction = FunctionL1Distance::create; + static constexpr auto CreateArrayFunction = createFunctionArrayL1Distance; +}; + +struct L2DistanceTraits +{ + static inline String name = "L2Distance"; + + static constexpr auto CreateTupleFunction = FunctionL2Distance::create; + static constexpr auto CreateArrayFunction = createFunctionArrayL2Distance; +}; + +struct LinfDistanceTraits +{ + static inline String name = "LinfDistance"; + + static constexpr auto CreateTupleFunction = FunctionLinfDistance::create; + static constexpr auto CreateArrayFunction = createFunctionArrayLinfDistance; +}; + +struct CosineDistanceTraits +{ + static inline String name = "cosineDistance"; + + static constexpr auto CreateTupleFunction = FunctionCosineDistance::create; + static constexpr auto CreateArrayFunction = createFunctionArrayCosineDistance; +}; + +using TupleOrArrayFunctionL1Norm = TupleOrArrayFunction; +using TupleOrArrayFunctionL2Norm = TupleOrArrayFunction; +using TupleOrArrayFunctionLinfNorm = TupleOrArrayFunction; + +using TupleOrArrayFunctionL1Distance = TupleOrArrayFunction; +using TupleOrArrayFunctionL2Distance = TupleOrArrayFunction; +using TupleOrArrayFunctionLinfDistance = TupleOrArrayFunction; +using TupleOrArrayFunctionCosineDistance = TupleOrArrayFunction; + void registerVectorFunctions(FunctionFactory & factory) { factory.registerFunction(); @@ -1081,19 +1197,19 @@ void registerVectorFunctions(FunctionFactory & factory) factory.registerFunction(); factory.registerAlias("scalarProduct", FunctionDotProduct::name, FunctionFactory::CaseInsensitive); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); - factory.registerAlias("normL1", FunctionL1Norm::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normL2", FunctionL2Norm::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normLinf", FunctionLinfNorm::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("normL1", TupleOrArrayFunctionL1Norm::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("normL2", TupleOrArrayFunctionL2Norm::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("normLinf", TupleOrArrayFunctionLinfNorm::name, FunctionFactory::CaseInsensitive); factory.registerAlias("normLp", FunctionLpNorm::name, FunctionFactory::CaseInsensitive); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerAlias("distanceL1", FunctionL1Distance::name, FunctionFactory::CaseInsensitive); @@ -1111,6 +1227,6 @@ void registerVectorFunctions(FunctionFactory & factory) factory.registerAlias("normalizeLinf", FunctionLinfNormalize::name, FunctionFactory::CaseInsensitive); factory.registerAlias("normalizeLp", FunctionLpNormalize::name, FunctionFactory::CaseInsensitive); - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index b6a7f9724c2..3dc8cbd70e6 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -90,9 +90,9 @@ WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type}) - SELECT sum(dist) FROM (SELECT array{norm}Norm(v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT array{norm}Distance(a, v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayCosineDistance(a, v) AS dist FROM vecs_{element_type}) + SELECT sum(dist) FROM (SELECT {norm}Norm(v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, v) AS dist FROM vecs_{element_type}) DROP TABLE vecs_{element_type} DROP TABLE tuples_{element_type} diff --git a/tests/queries/0_stateless/02282_array_distance.sql b/tests/queries/0_stateless/02282_array_distance.sql index 89214c57441..246b16daf65 100644 --- a/tests/queries/0_stateless/02282_array_distance.sql +++ b/tests/queries/0_stateless/02282_array_distance.sql @@ -1,18 +1,18 @@ -SELECT arrayL1Distance([0, 0, 0], [1, 2, 3]); -SELECT arrayL2Distance([1, 2, 3], [0, 0, 0]); -SELECT arrayLinfDistance([1, 2, 3], [0, 0, 0]); -SELECT arrayCosineDistance([1, 2, 3], [3, 5, 7]); +SELECT L1Distance([0, 0, 0], [1, 2, 3]); +SELECT L2Distance([1, 2, 3], [0, 0, 0]); +SELECT LinfDistance([1, 2, 3], [0, 0, 0]); +SELECT cosineDistance([1, 2, 3], [3, 5, 7]); -SELECT arrayL2Distance([1, 2, 3], NULL); -SELECT arrayCosineDistance([1, 2, 3], [0, 0, 0]); +SELECT L2Distance([1, 2, 3], NULL); +SELECT cosineDistance([1, 2, 3], [0, 0, 0]); -- Overflows WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -573412550, -552499284, 2096941042], 'Array(Int32)') AS a SELECT - arrayL1Distance(a,a), - arrayL2Distance(a,a), - arrayLinfDistance(a,a), - arrayCosineDistance(a, a); + L1Distance(a,a), + L2Distance(a,a), + LinfDistance(a,a), + cosineDistance(a, a); DROP TABLE IF EXISTS vec1; DROP TABLE IF EXISTS vec2; @@ -24,24 +24,24 @@ CREATE TABLE vec2f (id UInt64, v Array(Float32)) ENGINE = Memory; CREATE TABLE vec2d (id UInt64, v Array(Float64)) ENGINE = Memory; INSERT INTO vec1 VALUES (1, [3, 4, 5]), (2, [2, 4, 8]), (3, [7, 7, 7]); -SELECT arrayL1Distance(v, [0, 0, 0]) FROM vec1; -SELECT arrayL2Distance(v, [0, 0, 0]) FROM vec1; -SELECT arrayLinfDistance([5, 4, 3], v) FROM vec1; -SELECT arrayCosineDistance([3, 2, 1], v) FROM vec1; -SELECT arrayLinfDistance(v, materialize([0, -2, 0])) FROM vec1; -SELECT arrayCosineDistance(v, materialize([1., 1., 1.])) FROM vec1; +SELECT L1Distance(v, [0, 0, 0]) FROM vec1; +SELECT L2Distance(v, [0, 0, 0]) FROM vec1; +SELECT LinfDistance([5, 4, 3], v) FROM vec1; +SELECT cosineDistance([3, 2, 1], v) FROM vec1; +SELECT LinfDistance(v, materialize([0, -2, 0])) FROM vec1; +SELECT cosineDistance(v, materialize([1., 1., 1.])) FROM vec1; INSERT INTO vec2 VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); -SELECT v1.id, v2.id, arrayL2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2 v2; +SELECT v1.id, v2.id, L2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2 v2; INSERT INTO vec2f VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); -SELECT v1.id, v2.id, arrayL2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2f v2; +SELECT v1.id, v2.id, L2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2f v2; INSERT INTO vec2d VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); -SELECT v1.id, v2.id, arrayL2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2d v2; +SELECT v1.id, v2.id, L2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2d v2; -SELECT arrayL1Distance([0, 0], [1]); -- { serverError 190 } -SELECT arrayL2Distance((1, 2), (3,4)); -- { serverError 43 } +SELECT L1Distance([0, 0], [1]); -- { serverError 190 } +SELECT L2Distance([1, 2], (3,4)); -- { serverError 43 } DROP TABLE vec1; DROP TABLE vec2; diff --git a/tests/queries/0_stateless/02283_array_norm.sql b/tests/queries/0_stateless/02283_array_norm.sql index a759a47d79c..8408eea3f8b 100644 --- a/tests/queries/0_stateless/02283_array_norm.sql +++ b/tests/queries/0_stateless/02283_array_norm.sql @@ -1,13 +1,13 @@ -SELECT arrayL1Norm([1, 2, 3]); -SELECT arrayL2Norm([3., 4., 5.]); -SELECT arrayLinfNorm([0, 0, 2]); +SELECT L1Norm([1, 2, 3]); +SELECT L2Norm([3., 4., 5.]); +SELECT LinfNorm([0, 0, 2]); -- Overflows WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -573412550, -552499284, 2096941042], 'Array(Int32)') AS a SELECT - arrayL1Norm(a), - arrayL2Norm(a), - arrayLinfNorm(a); + L1Norm(a), + L2Norm(a), + LinfNorm(a); DROP TABLE IF EXISTS vec1; DROP TABLE IF EXISTS vec1f; @@ -19,16 +19,16 @@ INSERT INTO vec1 VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); INSERT INTO vec1f VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); INSERT INTO vec1d VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); -SELECT id, arrayL2Norm(v) FROM vec1; -SELECT id, arrayL1Norm(materialize([5., 6.])) FROM vec1; +SELECT id, L2Norm(v) FROM vec1; +SELECT id, L1Norm(materialize([5., 6.])) FROM vec1; -SELECT id, arrayL2Norm(v) FROM vec1f; -SELECT id, arrayL1Norm(materialize([5., 6.])) FROM vec1f; +SELECT id, L2Norm(v) FROM vec1f; +SELECT id, L1Norm(materialize([5., 6.])) FROM vec1f; -SELECT id, arrayL2Norm(v) FROM vec1d; -SELECT id, arrayL1Norm(materialize([5., 6.])) FROM vec1d; +SELECT id, L2Norm(v) FROM vec1d; +SELECT id, L1Norm(materialize([5., 6.])) FROM vec1d; -SELECT arrayL1Norm((1, 2,)); -- { serverError 43 } +SELECT L1Norm(1, 2); -- { serverError 42 } DROP TABLE vec1; DROP TABLE vec1f; From b12b363158aed86b5bf2fd4c5b6896888e4df83f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 25 May 2022 22:40:19 +0200 Subject: [PATCH 543/615] Fixed build of hierarchical index for HashedArrayDictionary --- src/Dictionaries/FlatDictionary.cpp | 12 ++++--- src/Dictionaries/HashedArrayDictionary.cpp | 4 +-- src/Dictionaries/HashedDictionary.cpp | 1 + ...ictionary_hierarchical_functions.reference | 35 +++++++++++++++++++ ...rray_dictionary_hierarchical_functions.sql | 34 ++++++++++++++++++ 5 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference create mode 100644 tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 72fa5cfb094..d0d9fba763c 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -255,13 +255,17 @@ DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchicalIndex() const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); HashMap> parent_to_child; + parent_to_child.reserve(element_count); - for (size_t i = 0; i < parent_keys.size(); ++i) + UInt64 child_keys_size = static_cast(parent_keys.size()); + + for (UInt64 child_key = 0; child_key < child_keys_size; ++child_key) { - auto parent_key = parent_keys[i]; + if (!loaded_keys[child_key]) + continue; - if (loaded_keys[i]) - parent_to_child[parent_key].emplace_back(static_cast(i)); + auto parent_key = parent_keys[child_key]; + parent_to_child[parent_key].emplace_back(child_key); } return std::make_shared(parent_to_child); diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index fbf6e01f288..66c63b7330d 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -311,8 +311,8 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionarygetMapped(); - auto child_key = parent_keys_container[i]; + auto child_key = it->getMapped(); + auto parent_key = parent_keys_container[i]; parent_to_child[parent_key].emplace_back(child_key); } diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 8e42c6f1d54..81d3d42617b 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -331,6 +331,7 @@ DictionaryHierarchyParentToChildIndexPtr HashedDictionary & parent_keys = std::get>(hierarchical_attribute.container); HashMap> parent_to_child; + parent_to_child.reserve(parent_keys.size()); for (const auto & [key, value] : parent_keys) parent_to_child[value].emplace_back(key); diff --git a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference new file mode 100644 index 00000000000..7f4ba0901b6 --- /dev/null +++ b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.reference @@ -0,0 +1,35 @@ +Get hierarchy +[] +[1] +[2,1] +[3,1] +[4,2,1] +[] +Get is in hierarchy +0 +1 +1 +1 +1 +0 +Get children +[1] +[2,3] +[4] +[] +[] +[] +Get all descendants +[1,2,3,4] +[2,3,4] +[4] +[] +[] +[] +Get descendants at first level +[1] +[2,3] +[4] +[] +[] +[] diff --git a/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql new file mode 100644 index 00000000000..a775f0e5cbf --- /dev/null +++ b/tests/queries/0_stateless/02311_hashed_array_dictionary_hierarchical_functions.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS hierarchy_source_table; +CREATE TABLE hierarchy_source_table +( + id UInt64, + parent_id UInt64 +) ENGINE = TinyLog; + +INSERT INTO hierarchy_source_table VALUES (1, 0), (2, 1), (3, 1), (4, 2); + +DROP DICTIONARY IF EXISTS hierarchy_hashed_array_dictionary; +CREATE DICTIONARY hierarchy_hashed_array_dictionary +( + id UInt64, + parent_id UInt64 HIERARCHICAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'hierarchy_source_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Get hierarchy'; +SELECT dictGetHierarchy('hierarchy_hashed_array_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get is in hierarchy'; +SELECT dictIsIn('hierarchy_hashed_array_dictionary', number, number) FROM system.numbers LIMIT 6; +SELECT 'Get children'; +SELECT dictGetChildren('hierarchy_hashed_array_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get all descendants'; +SELECT dictGetDescendants('hierarchy_hashed_array_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get descendants at first level'; +SELECT dictGetDescendants('hierarchy_hashed_array_dictionary', number, 1) FROM system.numbers LIMIT 6; + +DROP DICTIONARY hierarchy_hashed_array_dictionary; + +DROP TABLE hierarchy_source_table; From 4410d3d15f2f3c62b5c03fccdd391ba9e40722dc Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 25 May 2022 22:49:54 +0200 Subject: [PATCH 544/615] Better test --- ...emote_filesystem_cache_on_insert.reference | 20 +++++++++++----- ...2241_remote_filesystem_cache_on_insert.sql | 23 ++++++++++++++----- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 783227d5587..9181fce3c90 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -3,6 +3,7 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; +SYSTEM STOP MERGES test; SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size, state FROM @@ -95,6 +96,7 @@ INSERT INTO test SELECT number, toString(number) FROM numbers(100); INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); SELECT count() FROM system.filesystem_cache; 21 +SYSTEM START MERGES test OPTIMIZE TABLE test FINAL; SELECT count() FROM system.filesystem_cache; 27 @@ -104,12 +106,18 @@ SELECT count() FROM system.filesystem_cache; 28 INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); SYSTEM FLUSH LOGS; -SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM system.query_log -WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC +SYSTEM FLUSH LOGS; +SELECT + query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read +FROM + system.query_log +WHERE + query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' + AND type = 'QueryFinish' + AND current_database = currentDatabase() +ORDER BY + query_start_time + DESC LIMIT 1; SELECT count() FROM test; 5010500 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 31d4ca99abb..12875045373 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -79,6 +79,8 @@ FROM WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SYSTEM STOP MERGES test; + SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; @@ -89,6 +91,9 @@ SELECT count() FROM system.filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(100); INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); SELECT count() FROM system.filesystem_cache; + +SYSTEM START MERGES test; + OPTIMIZE TABLE test FINAL; SELECT count() FROM system.filesystem_cache; @@ -98,12 +103,18 @@ SELECT count() FROM system.filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); SYSTEM FLUSH LOGS; -SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM system.query_log -WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC +SELECT + query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read +FROM + system.query_log +WHERE + query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' + AND type = 'QueryFinish' + AND current_database = currentDatabase() +ORDER BY + query_start_time + DESC LIMIT 1; + SELECT count() FROM test; SELECT count() FROM test WHERE value LIKE '%010%'; From e1ed2aa3dce2a51040a082611b2115f69b69e938 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 25 May 2022 15:15:11 +0200 Subject: [PATCH 545/615] Add failed builds to the build report --- .github/workflows/backport_branches.yml | 11 +++--- .github/workflows/master.yml | 22 +++++------ .github/workflows/pull_request.yml | 22 +++++------ .github/workflows/release_branches.yml | 11 +++--- tests/ci/build_check.py | 3 +- tests/ci/build_report_check.py | 52 +++++++++++++++++++++---- tests/ci/env_helper.py | 3 +- 7 files changed, 79 insertions(+), 45 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 57474c3d9dd..b93c1b61ffd 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -359,15 +359,11 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' CHECK_NAME=ClickHouse build check (actions) REPORTS_PATH=${{runner.temp}}/reports_dir TEMP_PATH=${{runner.temp}}/report_check + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -382,8 +378,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 2af54da5e16..c890488ea80 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -970,16 +970,12 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' CHECK_NAME=ClickHouse build check (actions) REPORTS_PATH=${{runner.temp}}/reports_dir REPORTS_PATH=${{runner.temp}}/reports_dir TEMP_PATH=${{runner.temp}}/report_check + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -994,8 +990,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | @@ -1018,15 +1017,11 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/report_check REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=ClickHouse special build check (actions) + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1041,8 +1036,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 01490dff59e..8c5fa59aabc 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -1025,15 +1025,11 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' CHECK_NAME=ClickHouse build check (actions) REPORTS_PATH=${{runner.temp}}/reports_dir TEMP_PATH=${{runner.temp}}/report_check + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1048,8 +1044,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | @@ -1073,15 +1072,11 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/report_check REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=ClickHouse special build check (actions) + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1096,8 +1091,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index c16a4a6a568..a7a4b62a494 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -442,16 +442,12 @@ jobs: steps: - name: Set envs run: | - DEPENDENCIES=$(cat << 'EOF' | jq '. | length' - ${{ toJSON(needs) }} - EOF - ) - echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV" cat >> "$GITHUB_ENV" << 'EOF' CHECK_NAME=ClickHouse build check (actions) REPORTS_PATH=${{runner.temp}}/reports_dir REPORTS_PATH=${{runner.temp}}/reports_dir TEMP_PATH=${{runner.temp}}/report_check + NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -466,8 +462,11 @@ jobs: run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" + cat > "$NEEDS_DATA_PATH" << 'EOF' + ${{ toJSON(needs) }} + EOF cd "$GITHUB_WORKSPACE/tests/ci" - python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES" + python3 build_report_check.py "$CHECK_NAME" - name: Cleanup if: always() run: | diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index f8397bf3e76..9730ac2cc46 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -9,7 +9,7 @@ import time from shutil import rmtree from typing import List, Optional, Tuple -from env_helper import REPO_COPY, TEMP_PATH, CACHES_PATH, IMAGES_PATH +from env_helper import GITHUB_JOB, REPO_COPY, TEMP_PATH, CACHES_PATH, IMAGES_PATH from s3_helper import S3Helper from pr_info import PRInfo from version_helper import ( @@ -138,6 +138,7 @@ def create_json_artifact( "build_config": build_config, "elapsed_seconds": elapsed, "status": success, + "job_name": GITHUB_JOB, } json_name = "build_urls_" + build_name + ".json" diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 44cc45390a5..b2d54eadd60 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -9,11 +9,11 @@ from typing import Dict, List, Tuple from github import Github from env_helper import ( + GITHUB_REPOSITORY, + GITHUB_RUN_URL, + GITHUB_SERVER_URL, REPORTS_PATH, TEMP_PATH, - GITHUB_REPOSITORY, - GITHUB_SERVER_URL, - GITHUB_RUN_URL, ) from report import create_build_html_report from s3_helper import S3Helper @@ -24,6 +24,9 @@ from ci_config import CI_CONFIG from rerun_helper import RerunHelper +NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH") + + class BuildResult: def __init__( self, @@ -76,6 +79,23 @@ def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]: return groups +def get_failed_report( + job_name: str, +) -> Tuple[List[BuildResult], List[List[str]], List[str]]: + message = f"{job_name} failed" + build_result = BuildResult( + compiler="unknown", + build_type="unknown", + sanitizer="unknown", + bundled="unknown", + splitted="unknown", + status=message, + elapsed_seconds=0, + with_coverage=False, + ) + return [build_result], [[""]], [""] + + def process_report( build_report, ) -> Tuple[List[BuildResult], List[List[str]], List[str]]: @@ -117,15 +137,19 @@ def get_build_name_from_file_name(file_name): def main(): logging.basicConfig(level=logging.INFO) - reports_path = REPORTS_PATH temp_path = TEMP_PATH - logging.info("Reports path %s", reports_path) + logging.info("Reports path %s", REPORTS_PATH) if not os.path.exists(temp_path): os.makedirs(temp_path) build_check_name = sys.argv[1] - required_builds = int(sys.argv[2]) if len(sys.argv) > 2 else 0 + needs_data = None + required_builds = 0 + if os.path.exists(NEEDS_DATA_PATH): + with open(NEEDS_DATA_PATH, "rb") as file_handler: + needs_data = json.load(file_handler) + required_builds = len(needs_data) gh = Github(get_best_robot_token()) pr_info = PRInfo() @@ -140,7 +164,7 @@ def main(): # Collect reports from json artifacts builds_report_map = {} - for root, _, files in os.walk(reports_path): + for root, _, files in os.walk(REPORTS_PATH): for f in files: if f.startswith("build_urls_") and f.endswith(".json"): logging.info("Found build report json %s", f) @@ -163,12 +187,18 @@ def main(): ] some_builds_are_missing = len(build_reports) < required_builds + missing_build_names = [] if some_builds_are_missing: logging.warning( "Expected to get %s build results, got only %s", required_builds, len(build_reports), ) + missing_build_names = [ + name + for name in needs_data + if not any(rep for rep in build_reports if rep["job_name"] == name) + ] else: logging.info("Got exactly %s builds", len(builds_report_map)) @@ -186,6 +216,14 @@ def main(): build_artifacts.extend(build_artifacts_url) build_logs.extend(build_logs_url) + for failed_job in missing_build_names: + build_result, build_artifacts_url, build_logs_url = get_failed_report( + failed_job + ) + build_results.extend(build_result) + build_artifacts.extend(build_artifacts_url) + build_logs.extend(build_logs_url) + total_groups = len(build_results) logging.info("Totally got %s artifact groups", total_groups) if total_groups == 0: diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index c97c6298acc..dd081523db1 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -5,11 +5,12 @@ module_dir = p.abspath(p.dirname(__file__)) git_root = p.abspath(p.join(module_dir, "..", "..")) CI = bool(os.getenv("CI")) -TEMP_PATH = os.getenv("TEMP_PATH", module_dir) +TEMP_PATH = os.getenv("TEMP_PATH", p.abspath(p.join(module_dir, "./tmp"))) CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH", "") +GITHUB_JOB = os.getenv("GITHUB_JOB", "local") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0") GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") From 8f1aac0ce46cc7818969a27cfc59b2f915523ac1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 26 May 2022 00:44:45 +0200 Subject: [PATCH 546/615] Fix merge with master --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 07c46c20adf..1d7fd9a52ce 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -519,7 +519,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (!disk) disk = reservation->getDisk(); - UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); + UInt64 revision = parse(in.getResponseCookie("disk_revision", "0")); if (revision) disk->syncRevision(revision); From f488efd27e5d696834bf6749eb4c0d545576eb81 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 26 May 2022 00:03:31 +0000 Subject: [PATCH 547/615] fix tests --- src/Storages/MergeTree/MergeTreeData.cpp | 56 +++++++++---------- .../02306_part_types_profile_events.sql | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 62c11a31f68..6d72c11c3fc 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6763,42 +6763,42 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & return std::make_shared(*this, metadata_snapshot, object_columns, std::move(snapshot_data)); } -#define FOR_EACH_PART_TYPE(M) \ - M(Wide) \ - M(Compact) \ - M(InMemory) - -#define DECLARE_INCREMENT_EVENT_CASE(Event, Type) \ - case MergeTreeDataPartType::Type: \ - ProfileEvents::increment(ProfileEvents::Event##Type##Parts); \ - break; - -#define DECLARE_INCREMENT_EVENT(value, CASE) \ - switch (value) \ - { \ - FOR_EACH_PART_TYPE(CASE) \ - default: \ - break; \ - } - void MergeTreeData::incrementInsertedPartsProfileEvent(MergeTreeDataPartType type) { - #define DECLARE_INSERTED_EVENT_CASE(Type) DECLARE_INCREMENT_EVENT_CASE(Inserted, Type) - DECLARE_INCREMENT_EVENT(type.getValue(), DECLARE_INSERTED_EVENT_CASE) - #undef DECLARE_INSERTED_EVENT + switch (type.getValue()) + { + case MergeTreeDataPartType::Wide: + ProfileEvents::increment(ProfileEvents::InsertedWideParts); + break; + case MergeTreeDataPartType::Compact: + ProfileEvents::increment(ProfileEvents::InsertedCompactParts); + break; + case MergeTreeDataPartType::InMemory: + ProfileEvents::increment(ProfileEvents::InsertedInMemoryParts); + break; + default: + break; + } } void MergeTreeData::incrementMergedPartsProfileEvent(MergeTreeDataPartType type) { - #define DECLARE_MERGED_EVENT_CASE(Type) DECLARE_INCREMENT_EVENT_CASE(MergedInto, Type) - DECLARE_INCREMENT_EVENT(type.getValue(), DECLARE_MERGED_EVENT_CASE) - #undef DECLARE_MERGED_EVENT + switch (type.getValue()) + { + case MergeTreeDataPartType::Wide: + ProfileEvents::increment(ProfileEvents::MergedIntoWideParts); + break; + case MergeTreeDataPartType::Compact: + ProfileEvents::increment(ProfileEvents::MergedIntoCompactParts); + break; + case MergeTreeDataPartType::InMemory: + ProfileEvents::increment(ProfileEvents::MergedIntoInMemoryParts); + break; + default: + break; + } } -#undef FOR_EACH_PART_TYPE -#undef DECLARE_INCREMENT_EVENT_CASE -#undef DECLARE_INCREMENT_EVENT - CurrentlySubmergingEmergingTagger::~CurrentlySubmergingEmergingTagger() { std::lock_guard lock(storage.currently_submerging_emerging_mutex); diff --git a/tests/queries/0_stateless/02306_part_types_profile_events.sql b/tests/queries/0_stateless/02306_part_types_profile_events.sql index 0ec13bc3827..fd6178941f2 100644 --- a/tests/queries/0_stateless/02306_part_types_profile_events.sql +++ b/tests/queries/0_stateless/02306_part_types_profile_events.sql @@ -24,7 +24,7 @@ SYSTEM STOP MERGES t_parts_profile_events; SYSTEM FLUSH LOGS; SELECT count(), sum(ProfileEvents['InsertedWideParts']), sum(ProfileEvents['InsertedCompactParts']) - FROM system.query_log WHERE has(databases, currentDatabase()) + FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02306_part_types_profile_events' AND query ILIKE 'INSERT INTO%' AND type = 'QueryFinish'; From 417296481e4d4d05fcc034ba7292ab48de1604b9 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 26 May 2022 04:39:02 +0200 Subject: [PATCH 548/615] fix root CMakeLists.txt search --- utils/c++expr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/c++expr b/utils/c++expr index c498e780d05..c70a4c7d382 100755 --- a/utils/c++expr +++ b/utils/c++expr @@ -98,7 +98,7 @@ find_clickhouse_root () { echo "error: $DIR has no CMakeLists.txt" return 1 fi - if grep "project(ClickHouse)" "$DIR/CMakeLists.txt" >/dev/null 2>&1; then + if grep "project(ClickHouse" "$DIR/CMakeLists.txt" >/dev/null 2>&1; then echo $DIR return 0 fi From abe6b5d0133c00c1f672f4d5bf5ae2acefd18e26 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 26 May 2022 10:09:27 +0300 Subject: [PATCH 549/615] Reverted unnecessary modification --- src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index eee046206ba..7dee7b8d0f8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -156,7 +156,7 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) index_granularity_bytes = 0; if (checkString("constraints: ", in)) - in >> " " >> constraints >> "\n"; + in >> constraints >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) From 822ecd982f42d6e7c63b9ad91dc8cddd1ce62b48 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 16:36:05 +0800 Subject: [PATCH 550/615] better & support clean stash --- src/Common/FileCache.cpp | 15 ++++++++++++--- src/Common/FileCache.h | 2 ++ src/Common/FileSegment.cpp | 3 +++ src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 5 ++++- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index c2af2e07099..6c76bf5c0b3 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -408,7 +408,7 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( auto skip_or_download = [&]() -> FileSegmentPtr { - if (state == FileSegment::State::EMPTY) + if (state == FileSegment::State::EMPTY && enable_cache_hits_threshold) { auto record = records.find({key, offset}); @@ -425,8 +425,7 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( } /// For segments that do not reach the download threshold, we do not download them, but directly read them - state = queue_iter->hits >= enable_cache_hits_threshold ? FileSegment::State::EMPTY : FileSegment::State::SKIP_CACHE; - return std::make_shared(offset, size, key, this, state); + return std::make_shared(offset, size, key, this, FileSegment::State::SKIP_CACHE); } else { @@ -673,6 +672,10 @@ void LRUFileCache::remove() } } } + + /// Remove all access information. + records.clear(); + stash_queue.removeAll(cache_lock); } void LRUFileCache::remove( @@ -955,6 +958,12 @@ void LRUFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard & /* cache_lock */) +{ + queue.clear(); + cache_size = 0; +} + void LRUFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard & /* cache_lock */) { queue.splice(queue.end(), queue, queue_it); diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index a367d47885b..f66287b805f 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -196,6 +196,8 @@ private: Iterator end() { return queue.end(); } + void removeAll(std::lock_guard & cache_lock); + private: std::list queue; size_t cache_size = 0; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index aee3d470f44..27a111c1297 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -532,7 +532,10 @@ void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); if (is_last_holder && download_state == State::SKIP_CACHE) + { cache->remove(key(), offset(), cache_lock, segment_lock); + return; + } if (download_state == State::SKIP_CACHE || is_detached) return; diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 1cb6354d38c..0c0d14c247d 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -491,7 +491,10 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() /// Do not hold pointer to file segment if it is not needed anymore /// so can become releasable and can be evicted from cache. - file_segments_holder->file_segments.erase(file_segment_it); + /// If the status of filesegment state is SKIP_CACHE, it will not be deleted. + /// It will be deleted from the cache when the holder is destructed. + if ((*file_segment_it)->state() != FileSegment::State::SKIP_CACHE) + file_segments_holder->file_segments.erase(file_segment_it); if (current_file_segment_it == file_segments_holder->file_segments.end()) return false; From f5a186c774685c0315b6834fa2e04658a14af909 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 16:36:26 +0800 Subject: [PATCH 551/615] add test for local cache download --- tests/config/config.d/storage_conf.xml | 20 ++++++++++++++++++- ...0_system_remote_filesystem_cache.reference | 18 +++++++++++++++++ .../02240_system_remote_filesystem_cache.sql | 17 ++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 45fad002c88..90a679e4cdd 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -9,7 +9,6 @@ 1 0 22548578304 - 1 ./s3_cache/ @@ -22,6 +21,18 @@ 22548578304 0 + + s3 + http://localhost:11111/test/00170_test/ + clickhouse + clickhouse + 1 + 0 + 22548578304 + 1 + ./s3_cache/ + 1 + @@ -38,6 +49,13 @@ + + +
+ s3_cache_3 +
+
+
diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index 8bcb7e1dd42..d50f0847104 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -2,6 +2,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; +set enable_cache_hits_threshold=0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -17,3 +18,20 @@ SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesy 0 745 746 SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +set enable_cache_hits_threshold=1; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +0 0 1 +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +0 0 1 +0 745 746 +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index aa469779130..5e3991a44c0 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -4,6 +4,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; +set enable_cache_hits_threshold=0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -16,3 +17,19 @@ SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; + +set enable_cache_hits_threshold=1; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; \ No newline at end of file From ab4614dee5c4fabb29ea1e5c9b6a251e057b3373 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 16:39:50 +0800 Subject: [PATCH 552/615] fix --- tests/config/config.d/storage_conf.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 90a679e4cdd..2637f80f75f 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -9,6 +9,7 @@ 1 0 22548578304 + 1 ./s3_cache/ From 824628c0dace2a51ea3c7c8d1b6499a36200e6d7 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 16:51:16 +0800 Subject: [PATCH 553/615] fix style --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 0c0d14c247d..1e63dab165e 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -491,7 +491,7 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() /// Do not hold pointer to file segment if it is not needed anymore /// so can become releasable and can be evicted from cache. - /// If the status of filesegment state is SKIP_CACHE, it will not be deleted. + /// If the status of filesegment state is SKIP_CACHE, it will not be deleted. /// It will be deleted from the cache when the holder is destructed. if ((*file_segment_it)->state() != FileSegment::State::SKIP_CACHE) file_segments_holder->file_segments.erase(file_segment_it); From 286c13317ffc90f979084b04995c65c9aedc75d9 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 17:38:42 +0800 Subject: [PATCH 554/615] fix stateless test --- .../0_stateless/02240_system_remote_filesystem_cache.reference | 2 -- .../0_stateless/02240_system_remote_filesystem_cache.sql | 2 -- 2 files changed, 4 deletions(-) diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index d50f0847104..cfe1ad411cb 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -2,7 +2,6 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; -set enable_cache_hits_threshold=0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -18,7 +17,6 @@ SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesy 0 745 746 SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -set enable_cache_hits_threshold=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index 5e3991a44c0..60a8eba8f3e 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -4,7 +4,6 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; -set enable_cache_hits_threshold=0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -18,7 +17,6 @@ SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesy SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -set enable_cache_hits_threshold=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); From dc9ca3d70c9233c23e4f0e52dd4152f7f0023e97 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 26 May 2022 15:14:58 +0300 Subject: [PATCH 555/615] Fix LOGICAL_ERROR in getMaxSourcePartsSizeForMerge during merges (#37413) --- programs/server/Server.cpp | 7 +++++-- .../MergeTree/MergeTreeBackgroundExecutor.cpp | 6 ++++++ src/Storages/MergeTree/MergeTreeBackgroundExecutor.h | 3 ++- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- .../MergeTree/MergeTreeDataMergerMutator.cpp | 7 +++++-- src/Storages/MergeTree/MergeTreeSettings.cpp | 12 +++++------- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- src/Storages/StorageMergeTree.cpp | 4 +--- src/Storages/StorageReplicatedMergeTree.cpp | 4 +--- 9 files changed, 28 insertions(+), 21 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4a964b81694..75deeeb2eb5 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1395,8 +1395,11 @@ int Server::main(const std::vector & /*args*/) fs::create_directories(format_schema_path); /// Check sanity of MergeTreeSettings on server startup - global_context->getMergeTreeSettings().sanityCheck(settings); - global_context->getReplicatedMergeTreeSettings().sanityCheck(settings); + { + size_t background_pool_tasks = global_context->getMergeMutateExecutor()->getMaxTasksCount(); + global_context->getMergeTreeSettings().sanityCheck(background_pool_tasks); + global_context->getReplicatedMergeTreeSettings().sanityCheck(background_pool_tasks); + } /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start. CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs"); diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 3f82e60e82f..3399d46f0bb 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -62,6 +62,12 @@ void MergeTreeBackgroundExecutor::increaseThreadsAndMaxTasksCount(size_t threads_count = new_threads_count; } +template +size_t MergeTreeBackgroundExecutor::getMaxTasksCount() const +{ + std::lock_guard lock(mutex); + return max_tasks_count; +} template bool MergeTreeBackgroundExecutor::trySchedule(ExecutableTaskPtr task) diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h index bf3e7dc8bd8..de5af5199c0 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h @@ -192,6 +192,7 @@ public: /// Supports only increasing the number of threads and tasks, because /// implementing tasks eviction will definitely be too error-prone and buggy. void increaseThreadsAndMaxTasksCount(size_t new_threads_count, size_t new_max_tasks_count); + size_t getMaxTasksCount() const; bool trySchedule(ExecutableTaskPtr task); void removeTasksCorrespondingToStorage(StorageID id); @@ -209,7 +210,7 @@ private: /// Initially it will be empty Queue pending{}; boost::circular_buffer active{0}; - std::mutex mutex; + mutable std::mutex mutex; std::condition_variable has_tasks; std::atomic_bool shutdown{false}; ThreadPool pool; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d2c757f6750..be1c9ffd370 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -226,7 +226,7 @@ MergeTreeData::MergeTreeData( /// Check sanity of MergeTreeSettings. Only when table is created. if (!attach) - settings->sanityCheck(getContext()->getSettingsRef()); + settings->sanityCheck(getContext()->getMergeMutateExecutor()->getMaxTasksCount()); MergeTreeDataFormatVersion min_format_version(0); if (!date_column_name.empty()) @@ -2569,7 +2569,7 @@ void MergeTreeData::changeSettings( /// Reset to default settings before applying existing. auto copy = getDefaultSettings(); copy->applyChanges(new_changes); - copy->sanityCheck(getContext()->getSettingsRef()); + copy->sanityCheck(getContext()->getMergeMutateExecutor()->getMaxTasksCount()); storage_settings.set(std::move(copy)); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 99e5ef3ee21..c2c23793580 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -83,8 +83,11 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge() const UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t max_count, size_t scheduled_tasks_count) const { if (scheduled_tasks_count > max_count) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: invalid argument passed to \ - getMaxSourcePartsSize: scheduled_tasks_count = {} > max_count = {}", scheduled_tasks_count, max_count); + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Logical error: invalid argument passed to getMaxSourcePartsSize: scheduled_tasks_count = {} > max_count = {}", + scheduled_tasks_count, max_count); + } size_t free_entries = max_count - scheduled_tasks_count; const auto data_settings = data.getSettings(); diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index c1cc3b6ed3c..5b78a59687b 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -73,10 +73,9 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) #undef ADD_IF_ABSENT } -void MergeTreeSettings::sanityCheck(const Settings & query_settings) const +void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const { - if (number_of_free_entries_in_pool_to_execute_mutation > - query_settings.background_pool_size * query_settings.background_merges_mutations_concurrency_ratio) + if (number_of_free_entries_in_pool_to_execute_mutation > background_pool_tasks) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of 'number_of_free_entries_in_pool_to_execute_mutation' setting" " ({}) (default values are defined in section of config.xml" @@ -85,11 +84,10 @@ void MergeTreeSettings::sanityCheck(const Settings & query_settings) const " ({}) (the value is defined in users.xml for default profile)." " This indicates incorrect configuration because mutations cannot work with these settings.", number_of_free_entries_in_pool_to_execute_mutation, - query_settings.background_pool_size * query_settings.background_merges_mutations_concurrency_ratio); + background_pool_tasks); } - if (number_of_free_entries_in_pool_to_lower_max_size_of_merge > - query_settings.background_pool_size * query_settings.background_merges_mutations_concurrency_ratio) + if (number_of_free_entries_in_pool_to_lower_max_size_of_merge > background_pool_tasks) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of 'number_of_free_entries_in_pool_to_lower_max_size_of_merge' setting" " ({}) (default values are defined in section of config.xml" @@ -98,7 +96,7 @@ void MergeTreeSettings::sanityCheck(const Settings & query_settings) const " ({}) (the value is defined in users.xml for default profile)." " This indicates incorrect configuration because the maximum size of merge will be always lowered.", number_of_free_entries_in_pool_to_lower_max_size_of_merge, - query_settings.background_pool_size * query_settings.background_merges_mutations_concurrency_ratio); + background_pool_tasks); } // The min_index_granularity_bytes value is 1024 b and index_granularity_bytes is 10 mb by default. diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 0d984dc4dee..0b5d09803d7 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -183,7 +183,7 @@ struct MergeTreeSettings : public BaseSettings } /// Check that the values are sane taking also query-level settings into account. - void sanityCheck(const Settings & query_settings) const; + void sanityCheck(size_t background_pool_tasks) const; }; using MergeTreeSettingsPtr = std::shared_ptr; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b685162f826..f1da4542e86 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -100,9 +100,7 @@ StorageMergeTree::StorageMergeTree( attach) , reader(*this) , writer(*this) - , merger_mutator(*this, - getContext()->getSettingsRef().background_merges_mutations_concurrency_ratio * - getContext()->getSettingsRef().background_pool_size) + , merger_mutator(*this, getContext()->getMergeMutateExecutor()->getMaxTasksCount()) { loadDataParts(has_force_restore_data_flag); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index feadca23921..0e0767e5cb4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -263,9 +263,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_) , reader(*this) , writer(*this) - , merger_mutator(*this, - getContext()->getSettingsRef().background_merges_mutations_concurrency_ratio * - getContext()->getSettingsRef().background_pool_size) + , merger_mutator(*this, getContext()->getMergeMutateExecutor()->getMaxTasksCount()) , merge_strategy_picker(*this) , queue(*this, merge_strategy_picker) , fetcher(*this) From e8f33fb0d9997918b7d9f0b8401834f5d03800c1 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 25 May 2022 22:20:13 +0200 Subject: [PATCH 556/615] fix flaky tests --- src/Access/Common/AccessType.h | 1 + src/Interpreters/InterpreterSystemQuery.cpp | 16 ++++++++++++++++ src/Interpreters/InterpreterSystemQuery.h | 2 ++ src/Interpreters/TransactionLog.cpp | 10 ++++++++++ src/Interpreters/TransactionLog.h | 2 ++ src/Parsers/ASTSystemQuery.h | 1 + src/Storages/StorageMergeTree.cpp | 6 +++++- .../01169_alter_partition_isolation_stress.sh | 1 + .../0_stateless/01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 6 +++--- 10 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 8c10fd7e150..d5233951403 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -159,6 +159,7 @@ enum class AccessType M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \ M(SYSTEM_RESTORE_REPLICA, "RESTORE REPLICA", TABLE, SYSTEM) \ M(SYSTEM_SYNC_DATABASE_REPLICA, "SYNC DATABASE REPLICA", DATABASE, SYSTEM) \ + M(SYSTEM_SYNC_TRANSACTION_LOG, "SYNC TRANSACTION LOG", GLOBAL, SYSTEM) \ M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \ M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \ M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \ diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 0db6f353cf4..8b5c6ff2923 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -442,6 +443,9 @@ BlockIO InterpreterSystemQuery::execute() case Type::SYNC_DATABASE_REPLICA: syncReplicatedDatabase(query); break; + case Type::SYNC_TRANSACTION_LOG: + syncTransactionLog(); + break; case Type::FLUSH_DISTRIBUTED: flushDistributed(query); break; @@ -763,6 +767,13 @@ void InterpreterSystemQuery::syncReplicatedDatabase(ASTSystemQuery & query) } +void InterpreterSystemQuery::syncTransactionLog() +{ + getContext()->checkTransactionsAreAllowed(/* explicit_tcl_query */ true); + TransactionLog::instance().sync(); +} + + void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &) { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_DISTRIBUTED, table_id); @@ -937,6 +948,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_SYNC_DATABASE_REPLICA, query.getDatabase()); break; } + case Type::SYNC_TRANSACTION_LOG: + { + required_access.emplace_back(AccessType::SYSTEM_SYNC_TRANSACTION_LOG); + break; + } case Type::FLUSH_DISTRIBUTED: { required_access.emplace_back(AccessType::SYSTEM_FLUSH_DISTRIBUTED, query.getDatabase(), query.getTable()); diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index b5a80f0e19c..f07f18489f0 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -53,6 +53,8 @@ private: void syncReplicatedDatabase(ASTSystemQuery & query); + void syncTransactionLog(); + void restoreReplica(); void dropReplica(ASTSystemQuery & query); diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 4f0e79297b8..c5aaaa8eab3 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -597,4 +597,14 @@ TransactionLog::TransactionsList TransactionLog::getTransactionsList() const return running_list; } + +void TransactionLog::sync() const +{ + Strings entries_list = zookeeper->getChildren(zookeeper_path_log); + chassert(!entries_list.empty()); + std::sort(entries_list.begin(), entries_list.end()); + CSN newest_csn = deserializeCSN(entries_list.back()); + waitForCSNLoaded(newest_csn); +} + } diff --git a/src/Interpreters/TransactionLog.h b/src/Interpreters/TransactionLog.h index a0268ce9b88..49aa77b9868 100644 --- a/src/Interpreters/TransactionLog.h +++ b/src/Interpreters/TransactionLog.h @@ -126,6 +126,8 @@ public: bool isShuttingDown() const { return stop_flag.load(); } + void sync() const; + private: void loadLogFromZooKeeper(); void runUpdatingThread(); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index eff71a3a9a0..27973691d9d 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -37,6 +37,7 @@ public: DROP_REPLICA, SYNC_REPLICA, SYNC_DATABASE_REPLICA, + SYNC_TRANSACTION_LOG, RELOAD_DICTIONARY, RELOAD_DICTIONARIES, RELOAD_MODEL, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b685162f826..d4860195112 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -191,7 +191,11 @@ void StorageMergeTree::shutdown() /// parts which will remove themselves in their destructors. If so, we /// may have race condition between our remove call and background /// process. - clearOldPartsFromFilesystem(true); + /// Do not clear old parts in case when server is shutting down because it failed to start due to some exception. + + if (Context::getGlobalContextInstance()->getApplicationType() == Context::ApplicationType::SERVER + && Context::getGlobalContextInstance()->isServerCompletelyStarted()) + clearOldPartsFromFilesystem(true); } catch (...) { diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh index 9fef47c8d5d..653333dcb96 100755 --- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh @@ -73,6 +73,7 @@ function thread_partition_dst_to_src() $CLICKHOUSE_CLIENT --multiquery --query " SYSTEM STOP MERGES dst; ALTER TABLE dst DROP PARTITION ID 'nonexistent'; -- STOP MERGES doesn't wait for started merges to finish, so we use this trick + SYSTEM SYNC TRANSACTION LOG; BEGIN TRANSACTION; INSERT INTO dst VALUES /* ($i, 4) */ ($i, 4); INSERT INTO src SELECT * FROM dst; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index c50a911e371..b410595cf1d 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -113,6 +113,7 @@ SYSTEM SYNC REPLICA ['SYNC REPLICA'] TABLE SYSTEM SYSTEM RESTART REPLICA ['RESTART REPLICA'] TABLE SYSTEM SYSTEM RESTORE REPLICA ['RESTORE REPLICA'] TABLE SYSTEM SYSTEM SYNC DATABASE REPLICA ['SYNC DATABASE REPLICA'] DATABASE SYSTEM +SYSTEM SYNC TRANSACTION LOG ['SYNC TRANSACTION LOG'] GLOBAL SYSTEM SYSTEM FLUSH DISTRIBUTED ['FLUSH DISTRIBUTED'] TABLE SYSTEM FLUSH SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH SYSTEM FLUSH [] \N SYSTEM diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index d4ada9ba5c8..6b9ca26949b 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -276,7 +276,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM FLUSH DISTRIBUTED' = 115, 'SYSTEM FLUSH LOGS' = 116, 'SYSTEM FLUSH' = 117, 'SYSTEM THREAD FUZZER' = 118, 'SYSTEM' = 119, 'dictGet' = 120, 'addressToLine' = 121, 'addressToLineWithInlines' = 122, 'addressToSymbol' = 123, 'demangle' = 124, 'INTROSPECTION' = 125, 'FILE' = 126, 'URL' = 127, 'REMOTE' = 128, 'MONGO' = 129, 'MEILISEARCH' = 130, 'MYSQL' = 131, 'POSTGRES' = 132, 'SQLITE' = 133, 'ODBC' = 134, 'JDBC' = 135, 'HDFS' = 136, 'S3' = 137, 'HIVE' = 138, 'SOURCES' = 139, 'CLUSTER' = 140, 'ALL' = 141, 'NONE' = 142), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM' = 120, 'dictGet' = 121, 'addressToLine' = 122, 'addressToLineWithInlines' = 123, 'addressToSymbol' = 124, 'demangle' = 125, 'INTROSPECTION' = 126, 'FILE' = 127, 'URL' = 128, 'REMOTE' = 129, 'MONGO' = 130, 'MEILISEARCH' = 131, 'MYSQL' = 132, 'POSTGRES' = 133, 'SQLITE' = 134, 'ODBC' = 135, 'JDBC' = 136, 'HDFS' = 137, 'S3' = 138, 'HIVE' = 139, 'SOURCES' = 140, 'CLUSTER' = 141, 'ALL' = 142, 'NONE' = 143), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -550,10 +550,10 @@ ENGINE = SystemPartsColumns() COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM FLUSH DISTRIBUTED' = 115, 'SYSTEM FLUSH LOGS' = 116, 'SYSTEM FLUSH' = 117, 'SYSTEM THREAD FUZZER' = 118, 'SYSTEM' = 119, 'dictGet' = 120, 'addressToLine' = 121, 'addressToLineWithInlines' = 122, 'addressToSymbol' = 123, 'demangle' = 124, 'INTROSPECTION' = 125, 'FILE' = 126, 'URL' = 127, 'REMOTE' = 128, 'MONGO' = 129, 'MEILISEARCH' = 130, 'MYSQL' = 131, 'POSTGRES' = 132, 'SQLITE' = 133, 'ODBC' = 134, 'JDBC' = 135, 'HDFS' = 136, 'S3' = 137, 'HIVE' = 138, 'SOURCES' = 139, 'CLUSTER' = 140, 'ALL' = 141, 'NONE' = 142), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM' = 120, 'dictGet' = 121, 'addressToLine' = 122, 'addressToLineWithInlines' = 123, 'addressToSymbol' = 124, 'demangle' = 125, 'INTROSPECTION' = 126, 'FILE' = 127, 'URL' = 128, 'REMOTE' = 129, 'MONGO' = 130, 'MEILISEARCH' = 131, 'MYSQL' = 132, 'POSTGRES' = 133, 'SQLITE' = 134, 'ODBC' = 135, 'JDBC' = 136, 'HDFS' = 137, 'S3' = 138, 'HIVE' = 139, 'SOURCES' = 140, 'CLUSTER' = 141, 'ALL' = 142, 'NONE' = 143), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM FLUSH DISTRIBUTED' = 115, 'SYSTEM FLUSH LOGS' = 116, 'SYSTEM FLUSH' = 117, 'SYSTEM THREAD FUZZER' = 118, 'SYSTEM' = 119, 'dictGet' = 120, 'addressToLine' = 121, 'addressToLineWithInlines' = 122, 'addressToSymbol' = 123, 'demangle' = 124, 'INTROSPECTION' = 125, 'FILE' = 126, 'URL' = 127, 'REMOTE' = 128, 'MONGO' = 129, 'MEILISEARCH' = 130, 'MYSQL' = 131, 'POSTGRES' = 132, 'SQLITE' = 133, 'ODBC' = 134, 'JDBC' = 135, 'HDFS' = 136, 'S3' = 137, 'HIVE' = 138, 'SOURCES' = 139, 'CLUSTER' = 140, 'ALL' = 141, 'NONE' = 142)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM' = 120, 'dictGet' = 121, 'addressToLine' = 122, 'addressToLineWithInlines' = 123, 'addressToSymbol' = 124, 'demangle' = 125, 'INTROSPECTION' = 126, 'FILE' = 127, 'URL' = 128, 'REMOTE' = 129, 'MONGO' = 130, 'MEILISEARCH' = 131, 'MYSQL' = 132, 'POSTGRES' = 133, 'SQLITE' = 134, 'ODBC' = 135, 'JDBC' = 136, 'HDFS' = 137, 'S3' = 138, 'HIVE' = 139, 'SOURCES' = 140, 'CLUSTER' = 141, 'ALL' = 142, 'NONE' = 143)) ) ENGINE = SystemPrivileges() COMMENT 'SYSTEM TABLE is built on the fly.' From 55379f0d03a2a5ac091a0de037ddb097776fd982 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 26 May 2022 20:59:31 +0800 Subject: [PATCH 557/615] fix config --- tests/config/config.d/storage_conf.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 2637f80f75f..b22225163f8 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -31,7 +31,6 @@ 0 22548578304 1 - ./s3_cache/ 1
From 8ae277a1c4a76869616168fb0dc71d06313af311 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 26 May 2022 15:25:48 +0200 Subject: [PATCH 558/615] Fix test --- .../02241_remote_filesystem_cache_on_insert.reference | 3 +-- .../0_stateless/02241_remote_filesystem_cache_on_insert.sql | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 9181fce3c90..3627978dfbc 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -96,7 +96,7 @@ INSERT INTO test SELECT number, toString(number) FROM numbers(100); INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); SELECT count() FROM system.filesystem_cache; 21 -SYSTEM START MERGES test +SYSTEM START MERGES test; OPTIMIZE TABLE test FINAL; SELECT count() FROM system.filesystem_cache; 27 @@ -106,7 +106,6 @@ SELECT count() FROM system.filesystem_cache; 28 INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); SYSTEM FLUSH LOGS; -SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read FROM diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 12875045373..8b3f6a9c0b1 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -6,9 +6,8 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; - +SYSTEM STOP MERGES test; SYSTEM DROP FILESYSTEM CACHE; - SELECT file_segment_range_begin, file_segment_range_end, size, state FROM ( @@ -79,8 +78,6 @@ FROM WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; -SYSTEM STOP MERGES test; - SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; From c862f89b8d384227adf8bc4f4c3ba19b8a52179a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 26 May 2022 15:43:21 +0200 Subject: [PATCH 559/615] Fix tidy --- src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp index b09debf9a43..a36c987db7d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadataHelper.cpp @@ -303,7 +303,7 @@ static std::tuple extractRevisionAndOperationFromKey(const Strin String suffix; String operation; /// Key has format: ../../r{revision}(-{hostname})-{operation} - static const re2::RE2 key_regexp{".*/r(\\d+)(-[\\w\\d\\-\\.]+)?-(\\w+)$"}; + static const re2::RE2 key_regexp{R"(.*/r(\d+)(-[\w\d\-\.]+)?-(\w+)$)"}; re2::RE2::FullMatch(key, key_regexp, &revision_str, &suffix, &operation); From 36af6b1fa8486456d5f183b374518310eca48bde Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 26 May 2022 16:15:02 +0200 Subject: [PATCH 560/615] Fix assertion --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 33 +------------------ 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 1cb6354d38c..e323d1a4f0e 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -212,38 +212,6 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; return getRemoteFSReadBuffer(file_segment, read_type); } - case FileSegment::State::EMPTY: - { - auto downloader_id = file_segment->getOrSetDownloader(); - if (downloader_id == file_segment->getCallerId()) - { - if (file_offset_of_buffer_end == file_segment->getDownloadOffset()) - { - read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; - return getRemoteFSReadBuffer(file_segment, read_type); - } - else - { - /// segment{k} - /// cache: [______|___________ - /// ^ - /// download_offset - /// requested_range: [__________] - /// ^ - /// file_offset_of_buffer_end - assert(file_offset_of_buffer_end > file_segment->getDownloadOffset()); - bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset(); - - read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; - return getRemoteFSReadBuffer(file_segment, read_type); - } - } - else - { - download_state = file_segment->state(); - continue; - } - } case FileSegment::State::DOWNLOADING: { size_t download_offset = file_segment->getDownloadOffset(); @@ -280,6 +248,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( read_type = ReadType::CACHED; return getCacheReadBuffer(range.left); } + case FileSegment::State::EMPTY: case FileSegment::State::PARTIALLY_DOWNLOADED: { if (file_segment->getDownloadOffset() > file_offset_of_buffer_end) From c6c60364ae33c644a9aefedb5ad634db006d1550 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 26 May 2022 21:20:27 +0300 Subject: [PATCH 561/615] Remove unused MergeTreeDataMergerMutator::chooseMergeAlgorithm() In favor of MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm() Signed-off-by: Azat Khuzhin --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 40 ------------------- .../MergeTree/MergeTreeDataMergerMutator.h | 9 ----- 2 files changed, 49 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index c2c23793580..f596828ed05 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -535,46 +535,6 @@ MutateTaskPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( } -MergeAlgorithm MergeTreeDataMergerMutator::chooseMergeAlgorithm( - const MergeTreeData::DataPartsVector & parts, - size_t sum_rows_upper_bound, - const NamesAndTypesList & gathering_columns, - bool deduplicate, - bool need_remove_expired_values, - const MergeTreeData::MergingParams & merging_params) const -{ - const auto data_settings = data.getSettings(); - - if (deduplicate) - return MergeAlgorithm::Horizontal; - if (data_settings->enable_vertical_merge_algorithm == 0) - return MergeAlgorithm::Horizontal; - if (need_remove_expired_values) - return MergeAlgorithm::Horizontal; - - for (const auto & part : parts) - if (!part->supportsVerticalMerge()) - return MergeAlgorithm::Horizontal; - - bool is_supported_storage = - merging_params.mode == MergeTreeData::MergingParams::Ordinary || - merging_params.mode == MergeTreeData::MergingParams::Collapsing || - merging_params.mode == MergeTreeData::MergingParams::Replacing || - merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing; - - bool enough_ordinary_cols = gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate; - - bool enough_total_rows = sum_rows_upper_bound >= data_settings->vertical_merge_algorithm_min_rows_to_activate; - - bool no_parts_overflow = parts.size() <= RowSourcePart::MAX_PARTS; - - auto merge_alg = (is_supported_storage && enough_total_rows && enough_ordinary_cols && no_parts_overflow) ? - MergeAlgorithm::Vertical : MergeAlgorithm::Horizontal; - - return merge_alg; -} - - MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart( MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index f99f3d2e70d..a5f99c63f11 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -154,15 +154,6 @@ public : ActionBlocker ttl_merges_blocker; private: - - MergeAlgorithm chooseMergeAlgorithm( - const MergeTreeData::DataPartsVector & parts, - size_t rows_upper_bound, - const NamesAndTypesList & gathering_columns, - bool deduplicate, - bool need_remove_expired_values, - const MergeTreeData::MergingParams & merging_params) const; - MergeTreeData & data; const size_t max_tasks_count; From 3074be8d17b99188baa3c721c5dd1277e3036f09 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 26 May 2022 22:19:15 +0200 Subject: [PATCH 562/615] Add security generator --- utils/security-generator/SECURITY.md.sh | 80 +++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 utils/security-generator/SECURITY.md.sh diff --git a/utils/security-generator/SECURITY.md.sh b/utils/security-generator/SECURITY.md.sh new file mode 100755 index 00000000000..c8b8840b07d --- /dev/null +++ b/utils/security-generator/SECURITY.md.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# This is a script to automate the SECURITY.md generation in the repository root. +# The logic is the following: +# We support the latest ClickHouse Y.M stable release, +# the two releases before the latest stable, +# and the two latest LTS releases (which may be already included by the criteria above). +# The LTS releases are every Y.3 and Y.8 stable release. + +echo " +# Security Policy + +## Security Announcements +Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). + +## Scope and Supported Versions + +The following versions of ClickHouse server are currently being supported with security updates: +" + +clickhouse-local --query " +SELECT + concat(CAST(y, 'String'), '.', if(y < ((toYear(today()) - 2000) - 1), '*', CAST(m, 'String'))) AS Version, + if((n <= 3) OR (is_lts AND (lts_n <= 2)), '✔️', 'x') AS Supported +FROM +( + SELECT + y, + m, + count() OVER (ORDER BY y DESC, m DESC) AS n, + m IN (3, 8) AS is_lts, + countIf(is_lts) OVER (ORDER BY y DESC, m DESC) AS lts_n + FROM + ( + WITH + extractGroups(version, 'v(\\d+).(\\d+)') AS v, + CAST(v[1], 'UInt8') AS y, + CAST(v[2], 'UInt8') AS m + SELECT + y, + m + FROM file('$(dirname "${BASH_SOURCE[0]}")/../list-versions/version_date.tsv', TSV, 'version String, date String') + ORDER BY + y DESC, + m DESC + LIMIT 1 BY + y, + m + ) +) +LIMIT 1 BY Version +FORMAT Markdown" + +echo " +## Reporting a Vulnerability + +We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers. + +To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). + +### When Should I Report a Vulnerability? + +- You think you discovered a potential security vulnerability in ClickHouse +- You are unsure how a vulnerability affects ClickHouse + +### When Should I NOT Report a Vulnerability? + +- You need help tuning ClickHouse components for security +- You need help applying security related updates +- Your issue is not security related + +## Security Vulnerability Response + +Each report is acknowledged and analyzed by ClickHouse maintainers within 5 working days. +As the security issue moves from triage, to identified fix, to release planning we will keep the reporter updated. + +## Public Disclosure Timing + +A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect report date to disclosure date to be on the order of 7 days. +" From 359e36f42179704257ae0e6d5533e2d5124f39f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 26 May 2022 22:21:49 +0200 Subject: [PATCH 563/615] Readability --- utils/security-generator/SECURITY.md.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/security-generator/SECURITY.md.sh b/utils/security-generator/SECURITY.md.sh index c8b8840b07d..71a7275c2ca 100755 --- a/utils/security-generator/SECURITY.md.sh +++ b/utils/security-generator/SECURITY.md.sh @@ -20,8 +20,8 @@ The following versions of ClickHouse server are currently being supported with s clickhouse-local --query " SELECT - concat(CAST(y, 'String'), '.', if(y < ((toYear(today()) - 2000) - 1), '*', CAST(m, 'String'))) AS Version, - if((n <= 3) OR (is_lts AND (lts_n <= 2)), '✔️', 'x') AS Supported + y::String || '.' || (y < toYear(today()) - 2000 - 1 ? '*' : m::String) AS Version + (n <= 3 OR (is_lts AND lts_n <= 2)) ? '✔️' : 'x' AS Supported FROM ( SELECT @@ -34,8 +34,8 @@ FROM ( WITH extractGroups(version, 'v(\\d+).(\\d+)') AS v, - CAST(v[1], 'UInt8') AS y, - CAST(v[2], 'UInt8') AS m + v[1]::UInt8 AS y, + v[2]::UInt8 AS m SELECT y, m From 434d8729dec29b9ce600e631949596b05657263c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 26 May 2022 22:22:14 +0200 Subject: [PATCH 564/615] Readability --- utils/security-generator/SECURITY.md.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/security-generator/SECURITY.md.sh b/utils/security-generator/SECURITY.md.sh index 71a7275c2ca..fbb22b4a2ce 100755 --- a/utils/security-generator/SECURITY.md.sh +++ b/utils/security-generator/SECURITY.md.sh @@ -20,7 +20,7 @@ The following versions of ClickHouse server are currently being supported with s clickhouse-local --query " SELECT - y::String || '.' || (y < toYear(today()) - 2000 - 1 ? '*' : m::String) AS Version + y::String || '.' || (y < toYear(today()) - 2000 - 1 ? '*' : m::String) AS Version, (n <= 3 OR (is_lts AND lts_n <= 2)) ? '✔️' : 'x' AS Supported FROM ( From aeacfa0d7ecfc59af7bf9d4958245fd373f81e45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 26 May 2022 22:23:37 +0200 Subject: [PATCH 565/615] Readability --- utils/security-generator/SECURITY.md.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/security-generator/SECURITY.md.sh b/utils/security-generator/SECURITY.md.sh index fbb22b4a2ce..97c696c1227 100755 --- a/utils/security-generator/SECURITY.md.sh +++ b/utils/security-generator/SECURITY.md.sh @@ -21,7 +21,7 @@ The following versions of ClickHouse server are currently being supported with s clickhouse-local --query " SELECT y::String || '.' || (y < toYear(today()) - 2000 - 1 ? '*' : m::String) AS Version, - (n <= 3 OR (is_lts AND lts_n <= 2)) ? '✔️' : 'x' AS Supported + (n <= 3 OR (is_lts AND lts_n <= 2)) ? '✔️' : '❌' AS Supported FROM ( SELECT From 48ec7ceddb0a5f2d419ec8722212f52147b5fbef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 00:39:16 +0200 Subject: [PATCH 566/615] Remove useless files --- website/blog/README.md | 51 --- ...on-of-data-structures-in-yandex-metrica.md | 107 ----- .../2016/how-to-update-data-in-clickhouse.md | 169 -------- .../en/2016/yandex-opensources-clickhouse.md | 13 - .../en/2017/clickhouse-at-data-scale-2017.md | 11 - .../2017/clickhouse-at-percona-live-2017.md | 22 - ...ckhouse-meetup-in-berlin-october-5-2017.md | 10 - ...khouse-meetup-in-santa-clara-may-4-2017.md | 8 - .../join-the-clickhouse-meetup-in-berlin.md | 13 - ...ouse-meetup-in-amsterdam-on-november-15.md | 8 - .../2018/clickhouse-at-analysys-a10-2018.md | 27 -- .../clickhouse-at-percona-live-europe-2018.md | 25 -- ...ty-meetup-in-beijing-on-january-27-2018.md | 68 ---- ...ty-meetup-in-beijing-on-october-28-2018.md | 54 --- ...mmunity-meetup-in-berlin-on-july-3-2018.md | 39 -- ...se-community-meetup-in-berlin-on-july-3.md | 8 - ...unity-meetup-in-paris-on-october-2-2018.md | 20 - ...meetup-in-amsterdam-on-november-15-2018.md | 27 -- .../en/2018/concept-cloud-mergetree-tables.md | 120 ------ .../2019/clickhouse-at-percona-live-2019.md | 38 -- ...nese-academy-of-science-on-june-11-2019.md | 17 - ...khouse-meetup-in-beijing-on-june-8-2019.md | 35 -- ...khouse-meetup-in-limassol-on-may-7-2019.md | 41 -- ...khouse-meetup-in-madrid-on-april-2-2019.md | 28 -- ...-meetup-in-san-francisco-on-june-4-2019.md | 10 - ...peed-up-lz4-decompression-in-clickhouse.md | 13 - ...of-clickhouse-meetups-in-china-for-2019.md | 14 - .../five-methods-for-database-obfuscation.md | 11 - .../en/2020/package-repository-behind-cdn.md | 71 ---- website/blog/en/2020/pixel-benchmark.md | 84 ---- .../blog/en/2020/the-clickhouse-community.md | 138 ------- website/blog/en/2021/clickhouse-inc.md | 51 --- .../2021/clickhouse-october-moscow-meetup.md | 42 -- .../2021/clickhouse-raises-250m-series-b.md | 15 - .../en/2021/clickhouse-v21.10-released.md | 29 -- .../en/2021/clickhouse-v21.11-released.md | 63 --- .../en/2021/clickhouse-v21.12-released.md | 285 ------------- website/blog/en/2021/code-review.md | 83 ---- website/blog/en/2021/fuzzing-clickhouse.md | 58 --- ...ve-capabilities-in-clickhouse-databases.md | 16 - website/blog/en/2021/performance-test-1.md | 84 ---- ...y-friendly-google-analytics-alternative.md | 37 -- .../en/2021/reading-from-external-memory.md | 69 ---- website/blog/en/2021/tests-visualization.md | 45 --- ...ion-unique-users-a-day-using-clickhouse.md | 133 ------ .../blog/en/2022/clickhouse-v22.1-released.md | 248 ------------ .../blog/en/2022/clickhouse-v22.2-released.md | 90 ----- ...of-financial-data-a-day-with-clickhouse.md | 75 ---- website/blog/en/index.md | 3 - website/blog/en/redirects.txt | 33 -- website/careers/index.html | 32 -- website/company/index.html | 35 -- website/legal/trademark-policy/index.html | 31 -- website/locale/en/LC_MESSAGES/messages.mo | Bin 6243 -> 0 bytes website/locale/en/LC_MESSAGES/messages.po | 379 ------------------ website/locale/ja/LC_MESSAGES/messages.mo | Bin 424 -> 0 bytes website/locale/ja/LC_MESSAGES/messages.po | 326 --------------- website/locale/messages.pot | 328 --------------- website/locale/ru/LC_MESSAGES/messages.mo | Bin 505 -> 0 bytes website/locale/ru/LC_MESSAGES/messages.po | 327 --------------- website/locale/zh/LC_MESSAGES/messages.mo | Bin 424 -> 0 bytes website/locale/zh/LC_MESSAGES/messages.po | 325 --------------- website/support/agreement/index.html | 27 -- website/support/case/index.html | 27 -- website/support/policy/index.html | 27 -- website/thank-you/index.html | 25 -- 66 files changed, 4648 deletions(-) delete mode 100644 website/blog/README.md delete mode 100644 website/blog/en/2016/evolution-of-data-structures-in-yandex-metrica.md delete mode 100644 website/blog/en/2016/how-to-update-data-in-clickhouse.md delete mode 100644 website/blog/en/2016/yandex-opensources-clickhouse.md delete mode 100644 website/blog/en/2017/clickhouse-at-data-scale-2017.md delete mode 100644 website/blog/en/2017/clickhouse-at-percona-live-2017.md delete mode 100644 website/blog/en/2017/clickhouse-meetup-in-berlin-october-5-2017.md delete mode 100644 website/blog/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017.md delete mode 100644 website/blog/en/2017/join-the-clickhouse-meetup-in-berlin.md delete mode 100644 website/blog/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md delete mode 100644 website/blog/en/2018/clickhouse-at-analysys-a10-2018.md delete mode 100644 website/blog/en/2018/clickhouse-at-percona-live-europe-2018.md delete mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md delete mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md delete mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md delete mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3.md delete mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md delete mode 100644 website/blog/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md delete mode 100644 website/blog/en/2018/concept-cloud-mergetree-tables.md delete mode 100644 website/blog/en/2019/clickhouse-at-percona-live-2019.md delete mode 100644 website/blog/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md delete mode 100644 website/blog/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019.md delete mode 100644 website/blog/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019.md delete mode 100644 website/blog/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019.md delete mode 100644 website/blog/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md delete mode 100644 website/blog/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse.md delete mode 100644 website/blog/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019.md delete mode 100644 website/blog/en/2020/five-methods-for-database-obfuscation.md delete mode 100644 website/blog/en/2020/package-repository-behind-cdn.md delete mode 100644 website/blog/en/2020/pixel-benchmark.md delete mode 100644 website/blog/en/2020/the-clickhouse-community.md delete mode 100644 website/blog/en/2021/clickhouse-inc.md delete mode 100644 website/blog/en/2021/clickhouse-october-moscow-meetup.md delete mode 100644 website/blog/en/2021/clickhouse-raises-250m-series-b.md delete mode 100644 website/blog/en/2021/clickhouse-v21.10-released.md delete mode 100644 website/blog/en/2021/clickhouse-v21.11-released.md delete mode 100644 website/blog/en/2021/clickhouse-v21.12-released.md delete mode 100644 website/blog/en/2021/code-review.md delete mode 100644 website/blog/en/2021/fuzzing-clickhouse.md delete mode 100644 website/blog/en/2021/how-to-enable-predictive-capabilities-in-clickhouse-databases.md delete mode 100644 website/blog/en/2021/performance-test-1.md delete mode 100644 website/blog/en/2021/plausible-uses-clickHouse-to-power-privacy-friendly-google-analytics-alternative.md delete mode 100644 website/blog/en/2021/reading-from-external-memory.md delete mode 100644 website/blog/en/2021/tests-visualization.md delete mode 100644 website/blog/en/2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md delete mode 100644 website/blog/en/2022/clickhouse-v22.1-released.md delete mode 100644 website/blog/en/2022/clickhouse-v22.2-released.md delete mode 100644 website/blog/en/2022/opensee-analyzing-terabytes-of-financial-data-a-day-with-clickhouse.md delete mode 100644 website/blog/en/index.md delete mode 100644 website/blog/en/redirects.txt delete mode 100644 website/careers/index.html delete mode 100644 website/company/index.html delete mode 100644 website/legal/trademark-policy/index.html delete mode 100644 website/locale/en/LC_MESSAGES/messages.mo delete mode 100644 website/locale/en/LC_MESSAGES/messages.po delete mode 100644 website/locale/ja/LC_MESSAGES/messages.mo delete mode 100644 website/locale/ja/LC_MESSAGES/messages.po delete mode 100644 website/locale/messages.pot delete mode 100644 website/locale/ru/LC_MESSAGES/messages.mo delete mode 100644 website/locale/ru/LC_MESSAGES/messages.po delete mode 100644 website/locale/zh/LC_MESSAGES/messages.mo delete mode 100644 website/locale/zh/LC_MESSAGES/messages.po delete mode 100644 website/support/agreement/index.html delete mode 100644 website/support/case/index.html delete mode 100644 website/support/policy/index.html delete mode 100644 website/thank-you/index.html diff --git a/website/blog/README.md b/website/blog/README.md deleted file mode 100644 index 2c29eda459b..00000000000 --- a/website/blog/README.md +++ /dev/null @@ -1,51 +0,0 @@ -## Introduction - -First of all, **relevant guest posts are welcome**! Especially with success stories or demonstration of ClickHouse ecosystem projects. - -The ClickHouse blog is published alongside documentation and the rest of official website. So the posts reside in this same repository in [Markdown](https://github.com/ClickHouse/ClickHouse/tree/master/docs#markdown-cheatsheet) format. - -## How To Add a New Post? - -Basically you need to create a new Markdown file at the following location inside repository `/website/blog///.md` and then [open a pull-request](https://github.com/ClickHouse/ClickHouse/compare) with it. You can do it even right from the GitHub web interface using the "Create new file" button. - -Each post needs to have a `yaml` meta-header with the following fields: - -- Required: - - `title`, main name of the article. In Title Case for English. - - `date`, publication date in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format, like `YYYY-MM-DD` (can be in future to postpone publication). -- Optional: - - `image`, URL to main post image. - - `tags`, list of post tags. - -Then after header goes post content in a normal markdown (with some optional extensions). - -The recommended place to store images is this GitHub repo: . It's folder structure matches this folder with blog posts: - -- `///main.jpg` for main post image (linked in `image` header field). -- `///whatever.jpg` for other images (`png` or `gif` are acceptable as well, if necessary). - -### Example - ```markdown ---- -title: 'ClickHouse Meetup in Beijing on June 8, 2019' -image: 'https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/main.jpg' -date: '2019-06-13' -tags: ['meetup','Beijing','China','events'] ---- - -24th ClickHouse Meetup globally and 3rd one in China took place in Beijing on Dragon Boat Festival weekend, which appeared to... - -![ClickHouse branded Beijing duck](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/9.jpg) -``` - -## How To Preview My Post? - -Use [deploy-to-test.sh](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/deploy-to-test.sh) script. Note that on the first use you'll need to follow the steps in its first comment, and [install prerequisites for build.py](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/README.md#use-buildpy-use-build-py). Alternatively, you can use `--livereload=N` argument of [build.py](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/build.py). - -## How To Add a New Blog Language? - -If you want to write a guest post, you are welcome to use your native language or make multiple posts in multiple languages - -Unlike documentation, blog languages are independent, i.e. they have partially overlapping sets of posts and it's ok. Most posts are written only in one language because they are not relevant to audiences of other languages. - -At the moment it's not so straightforward to set up a new language for blog and it won't be documented for now, but you can just create a language directory with the first post as described above and we'll configure the website infrastructure to include it during/after merging the pull-request. diff --git a/website/blog/en/2016/evolution-of-data-structures-in-yandex-metrica.md b/website/blog/en/2016/evolution-of-data-structures-in-yandex-metrica.md deleted file mode 100644 index 3e717e467c1..00000000000 --- a/website/blog/en/2016/evolution-of-data-structures-in-yandex-metrica.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: 'Evolution of Data Structures in Yandex.Metrica' -image: 'https://blog-images.clickhouse.com/en/2016/evolution-of-data-structures-in-yandex-metrica/main.jpg' -date: '2016-12-13' -tags: ['Yandex.Metrica', 'data structures', 'LSM tree', 'columnar storage'] -author: 'Alexey Milovidov' ---- - -[Yandex.Metrica](https://metrica.yandex.com/) takes in a stream of data representing events that took place on sites or on apps. Our task is to keep this data and present it in an analyzable form. The real challenge lies in trying to determine what form the processed results should be saved in so that they are easy to work with. During the development process, we had to completely change our approach to data storage organization several times. We started with MyISAM tables, then used LSM-trees and eventually came up with column-oriented database, ClickHouse. - -At its founding, Metrica was designed as an offshoot of Yandex.Direct, the search ads service. MySQL tables with MyISAM engine were used in Direct to store statistics and it was natural to use same approach in Metrica. Initially Yandex.Metrica for websites had more than 40 “fixed” report types (for example, the visitor geography report), several in-page analytics tools (like click maps), Webvisor (tool to study individual user actions in great detail), as well as the separate report constructor. But with time to keep up with business goals the system had to become more flexible and provide more customization opportunities for customers. Nowadays instead of using fixed reports Metrica allows to freely add new dimensions (for example, in a keyword report you can break data down further by landing page), segment and compare (between, let's say, traffic sources for all visitors vs. visitors from Moscow), change your set of metrics, etc. These features demanded a completely different approach to data storage than what we used with MyISAM, we will further discuss this transition from technical perspective. - -## MyISAM - -Most SELECT queries that fetch data for reports are made with the conditions WHERE CounterID = AND Date BETWEEN min_date AND max_date. Sometimes there is also filter by region, so it made sense to use complex primary key to turn this into primary key range is read. So table schema for Metrica looks like this: CounterID, Date, RegionID -> Visits, SumVisitTime, etc. Now we'll take a look at what happens when it comes in. - -A MyISAM table is comprised of a data file and an index file. If nothing was deleted from the table and the rows did not change in length during updating, the data file will consist of serialized rows arranged in succession in the order that they were added. The index (including the primary key) is a B-tree, where the leaves contain offsets in the data file. When we read index range data, a lot of offsets in the data file are taken from the index. Then reads are issued for this set of offsets in the data file. - -Let's look at the real-life situation when the index is in RAM (key cache in MySQL or system page cache), but the table data is not cached. Let's assume that we are using HDDs. The time it takes to read data depends on the volume of data that needs to be read and how many Seek operations need to be run. The number of Seek's is determined by the locality of data on the disk. - -Data locality illustrated: -![Data locality](https://blog-images.clickhouse.com/en/2016/evolution-of-data-structures-in-yandex-metrica/1.jpg) - -Metrica events are received in almost the same order in which they actually took place. In this incoming stream, data from different counters is scattered completely at random. In other words, incoming data is local by time, but not local by CounterID. When writing to a MyISAM table, data from different counters is also placed quite randomly. This means that to read the data report, you will need to perform about as many random reads as there are rows that we need in the table. - -A typical 7200 rpm hard disk can perform 100 to 200 random reads per second. A RAID, if used properly, can handle the same amount multiplied by number of disks in it. One five-year-old SSD can perform 30,000 random reads per second, but we cannot afford to keep our data on SSD. So in this case, if we needed to read 10,000 rows for a report, it would take more than 10 seconds, which would be totally unacceptable. - -InnoDB is much better suited to reading primary key ranges since it uses [a clustered primary key](https://en.wikipedia.org/wiki/Database_index#Clustered) (i.e., the data is stored in an orderly manner on the primary key). But InnoDB was impossible to use due to its slow write speed. If this reminds you of [TokuDB](https://www.percona.com/software/mysql-database/percona-tokudb), then read on. - -It took a lot of tricks like periodic table sorting, complicated manual partitioning schemes, and keeping data in generations to keep Yandex.Metrica working on MyISAM. This approach also had a lot of lot of operational drawbacks, for example slow replication, consistency, unreliable recovery, etc. Nevertheless, as of 2011, we stored more than 580 billion rows in MyISAM tables. - -## Metrage and OLAPServer - -Metrage is an implementation of [LSM Tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree), a fairly common data structure that works well for workloads with intensive stream of writes and mostly primary key reads, like Yandex.Metrica has. LevelDB did not exist in 2010 and TokuDB was proprietary at the time. - -![LSM Tree](https://blog-images.clickhouse.com/en/2016/evolution-of-data-structures-in-yandex-metrica/2.jpg) - -In Metrage arbitrary data structures (fixed at compile time) can be used as “rows” in it. Every row is a key, value pair. A key is a structure with comparison operations for equality and inequality. The value is an arbitrary structure with operations to update (to add something) and merge (to aggregate or combine with another value). In short, it's a CRDT. Data is located pretty locally on the hard disk, so the primary key range reads are quick. Blocks of data are effectively compressed even with fast algorithms because of ordering (in 2010 we used QuickLZ, since 2011 - LZ4). Storing data in a systematic manner enables us to use a sparse index. - -Since reading is not performed very often (even though lot of rows are read when it does) the increase in latency due to having many chunks and decompressing the data block does not matter. Reading extra rows because of the index sparsity also does not make a difference. - -After transferring reports from MyISAM to Metrage, we immediately saw an increase in Metrica interface speed. Whereas earlier the 90% of page-title reports loaded in 26 seconds, with Metrage they loaded in 0.8 seconds (total time, including time to process all database queries and follow-up data transformations). The time it takes Metrage itself to process queries (for all reports) is as follows according to percent: average = 6 ms, 90tile = 31 ms, 99tile = 334 ms. - -We've been using Metrage for five years and it has proved to be a reliable solution. As of 2015 we stored 3.37 trillion rows in Metrage and used 39 * 2 servers for this. - -Its advantages were simplicity and effectiveness, which made it a far better choice for storing data than MyISAM. Though the system still had one huge drawback: it really only works effectively with fixed reports. Metrage aggregates data and saves aggregated data. But in order to do this, you have to list all the ways in which you want to aggregate data ahead of time. So if we do this in 40 different ways, it means that Metrica will contain 40 types of reports and no more. - -To mitigate this we had to keep for a while a separate storage for custom report wizard, called OLAPServer. It is a simple and very limited implementation of a column-oriented database. It supports only one table set in compile time — a session table. Unlike Metrage, data is not updated in real-time, but rather a few times per day. The only data type supported is fixed-length numbers of 1-8 bytes, so it wasn“t suitable for reports with other kinds of data, for example URLs. - -## ClickHouse - -Using OLAPServer, we developed an understanding of how well column-oriented DBMS's handle ad-hoc analytics tasks with non-aggregated data. If you can retrieve any report from non-aggregated data, then it begs the question of whether data even needs to be aggregated in advance, as we did with Metrage. - -![](https://blog-images.clickhouse.com/en/2016/evolution-of-data-structures-in-yandex-metrica/3.gif) - -On the one hand, pre-aggregating data can reduce the volume of data that is used at the moment when the report page is loading. On the other hand, though, aggregated data doesn't solve everything. Here are the reasons why: - -- you need to have a list of reports that your users need ahead of time; in other words, the user can't put together a custom report -- when aggregating a lot of keys, the amount of data is not reduced and aggregation is useless; when there are a lot of reports, there are too many aggregation options (combinatorial explosion) -- when aggregating high cardinality keys (for example, URLs) the amount of data does not decrease by much (by less than half) -due to this, the amount of data may not be reduced, but actually grow during aggregation -- users won't view all the reports that we calculate for them (in other words, a lot of the calculations prove useless) -- it's difficult to maintain logical consistency when storing a large number of different aggregations - -As you can see, if nothing is aggregated and we work with non-aggregated data, then it's possible that the volume of computations will even be reduced. But only working with non-aggregated data imposes very high demands on the effectiveness of the system that executes the queries. - -So if we aggregate the data in advance, then we should do it constantly (in real time), but asynchronously with respect to user queries. We should really just aggregate the data in real time; a large portion of the report being received should consist of prepared data. - -If data is not aggregated in advance, all the work has to be done at the moment the user request it (i.e. while they wait for the report page to load). This means that many billions of rows need to be processed in response to the user's query; the quicker this can be done, the better. - -For this you need a good column-oriented DBMS. The market didn‘t have any column-oriented DBMS's that would handle internet-analytics tasks on the scale of Runet (the Russian internet) well enough and would not be prohibitively expensive to license. - -Recently, as an alternative to commercial column-oriented DBMS's, solutions for efficient ad-hoc analytics of data in distributed computing systems began appearing: Cloudera Impala, Spark SQL, Presto, and Apache Drill. Although such systems can work effectively with queries for internal analytical tasks, it is difficult to imagine them as the backend for the web interface of an analytical system accessible to external users. - -At Yandex, we developed and later opensourced our own column-oriented DBMS — ClickHouse. Let's review the basic requirements that we had in mind before we proceeded to development. - -**Ability to work with large datasets.** In current Yandex.Metrica for websites, ClickHouse is used to store all data for reports. As of November, 2016, the database is comprised of 18.3 trillion rows. It‘s made up of non-aggregated data that is used to retrieve reports in real-time. Every row in the largest table contains over 200 columns. - -**The system should scale linearly.** ClickHouse allows you to increase the size of cluster by adding new servers as needed. For example, Yandex.Metrica's main cluster has increased from 60 to 426 servers in three years. In the aim of fault tolerance, our servers are spread across different data centers. ClickHouse can use all hardware resources to process a single query. This way more than 2 terabyte can be processed per second. - -**High efficiency.** We especially pride ourselves on our database's high performance. Based on the results of internal tests, ClickHouse processes queries faster than any other system we could acquire. For example, ClickHouse works an average of 2.8-3.4 times faster than Vertica. With ClickHouse there is no one silver bullet that makes the system work so quickly. - -**Functionality should be sufficient for Web analytics tools.** The database supports the SQL language dialect, subqueries and JOINs (local and distributed). There are numerous SQL extensions: functions for web analytics, arrays and nested data structures, higher-order functions, aggregate functions for approximate calculations using sketching, etc. By working with ClickHouse, you get the convenience of a relational DBMS. - -ClickHouse was initially developed by the Yandex.Metrica team. Furthermore, we were able to make the system flexible and extensible enough that it can be successfully used for different tasks. Although the database can run on large clusters, it can be installed on one server or even on a virtual machine. There are now more than a dozen different ClickHouse applications within our company. - -ClickHouse is well equipped for creating all kinds of analytical tools. Just consider: if the system can handle the challenges of Yandex.Metrica, you can be sure that ClickHouse will cope with other tasks with a lot of performance headroom to spare. - -ClickHouse works well as a time series database; at Yandex it is commonly used as the backend for Graphite instead of Ceres/Whisper. This lets us work with more than a trillion metrics on a single server. - -ClickHouse is used by analytics for internal tasks. Based on our experience at Yandex, ClickHouse performs at about three orders of magnitude higher than traditional methods of data processing (scripts on MapReduce). But this is not a simple quantitative difference. The fact of the matter is that by having such a high calculation speed, you can afford to employ radically different methods of problem solving. - -If an analyst has to make a report and they are competent at their job, they won't just go ahead and construct one report. Rather, they will start by retrieving dozens of other reports to better understand the nature of the data and test various hypotheses. It is often useful to look at data from different angles in order to posit and check new hypotheses, even if you don't have a clear goal. - -This is only possible if the data analysis speed allows you to conduct online research. The faster queries are executed, the more hypotheses you can test. Working with ClickHouse, one even gets the sense that they are able to think faster. - -In traditional systems, data is like a dead weight, figuratively speaking. You can manipulate it, but it takes a lot of time and is inconvenient. If your data is in ClickHouse though, it is much more malleable: you can study it in different cross-sections and drill down to the individual rows of data. - -## Conclusions - -Yandex.Metrica has become the second largest web-analytics system in the world. The volume of data that Metrica takes in grew from 200 million events a day in 2009 to more than 25 billion in 2016. In order to provide users with a wide variety of options while still keeping up with the increasing workload, we've had to constantly modify our approach to data storage. - -Effective hardware utilization is very important to us. In our experience, when you have a large volume of data, it's better not to worry as much about how well the system scales and instead focus on how effectively each unit of resource is used: each processor core, disk and SSD, RAM, and network. After all, if your system is already using hundreds of servers, and you have to work ten times more efficiently, it is unlikely that you can just proceed to install thousands of servers, no matter how scalable your system is. - -To maximize efficiency, it's important to customize your solution to meet the needs of specific type of workload. There is no data structure that copes well with completely different scenarios. For example, it's clear that key-value databases don't work for analytical queries. The greater the load on the system, the narrower the specialization required. One should not be afraid to use completely different data structures for different tasks. - -We were able to set things up so that Yandex.Metrica's hardware was relatively inexpensive. This has allowed us to offer the service free of charge to even very large sites and mobile apps, even larger than Yanex‘s own, while competitors typically start asking for a paid subscription plan. diff --git a/website/blog/en/2016/how-to-update-data-in-clickhouse.md b/website/blog/en/2016/how-to-update-data-in-clickhouse.md deleted file mode 100644 index 951fb957d13..00000000000 --- a/website/blog/en/2016/how-to-update-data-in-clickhouse.md +++ /dev/null @@ -1,169 +0,0 @@ ---- -title: 'How to Update Data in ClickHouse' -date: '2016-11-20' -image: 'https://blog-images.clickhouse.com/en/2016/how-to-update-data-in-clickhouse/main.jpg' -tags: ['features', 'update', 'delete', 'CollapsingMergeTree', 'partitions'] ---- - -There is no UPDATE or DELETE commands in ClickHouse at the moment. And that's not because we have some religious believes. ClickHouse is performance-oriented system; and data modifications are hard to store and process optimally in terms of performance. - -But sometimes we have to modify data. And sometimes data should be updated in realtime. Don't worry, we have these cases covered. - -## Work with Partitions - -Data in MergeTree engine family is partitioned by partition_key engine parameter. MergeTree split all the data by this partition key. Partition size is one month. - -That's very useful in many terms. Especially when we're talking about data modification. - -## Yandex.Metrica "hits" Table - -Let's look at an example on Yandex.Metrica server mtlog02-01-1 which store some Yandex.Metrica data for year 2013. Table we are looking at contains user events we call “hits”. This is the engine description for hits table: - -``` text -ENGINE = ReplicatedMergeTree( - '/clickhouse/tables/{layer}-{shard}/hits', -- zookeeper path - '{replica}', -- settings in config describing replicas - EventDate, -- partition key column - intHash32(UserID), -- sampling key - (CounterID, EventDate, intHash32(UserID), WatchID), -- index - 8192 -- index granularity -) -``` - -You can see that the partition key column is EventDate. That means that all the data will be splitted by months using this column. - -With this SQL we can get partitions list and some stats about current partitions: - -```sql -SELECT - partition, - count() as number_of_parts, - formatReadableSize(sum(bytes)) as sum_size -FROM system.parts -WHERE - active - AND database = 'merge' - AND table = 'hits' -GROUP BY partition -ORDER BY partition; -``` -```text -┌─partition─┬─number_of_parts─┬─sum_size───┐ -│ 201306 │ 1 │ 191.34 GiB │ -│ 201307 │ 4 │ 537.86 GiB │ -│ 201308 │ 6 │ 608.77 GiB │ -│ 201309 │ 5 │ 658.68 GiB │ -│ 201310 │ 5 │ 768.74 GiB │ -│ 201311 │ 5 │ 654.61 GiB │ -└───────────┴─────────────────┴────────────┘ -``` -There are 6 partitions with a few parts in each of them. Each partition is around 600 Gb of data. Partition is strictly one piece of data for partition key, here we can see that it is months. Part is one piece of data inside partition. Basically it's one node of LSMT structure, so there are not so many of them, especially for old data. If there are too many of them, they merge and form bigger ones. - -## Partition Operations - -There is a nice set of operations to work with partitions: - -- `DETACH PARTITION` - Move a partition to the 'detached' directory and forget it. -- `DROP PARTITION` - Delete a partition. -- `ATTACH PART|PARTITION` -- Add a new part or partition from the 'detached' directory to the table. -- `FREEZE PARTITION` - Create a backup of a partition. -- `FETCH PARTITION` - Download a partition from another server. - -We can do any data management operations on partitions level: move, copy and delete. Also, special DETACH and ATTACH operations are created to simplify data manipulation. DETACH detaches partition from table, moving all data to detached directory. Data is still there and you can copy it anywhere but detached data is not visible on request level. ATTACH is the opposite: attaches data from detached directory so it become visible. - -This attach-detach commands works almost in no time so you can make your updates almost transparently to database clients. - -Here is the plan how to update data using partitions: - -- Create modified partition with updated data on another table -- Copy data for this partition to detached directory -- `DROP PARTITION` in main table -- `ATTACH PARTITION` in main table - -Partition swap especially useful for huge data updates with low frequency. But they're not so handy when you need to update a lot of data in real time. - -## Update Data on the Fly - -In Yandex.Metrica we have user sessions table. Each row is one session on a website: some pages checked, some time spent, some banners clicked. This data is updated every second: user on a website view more pages, click more buttons, and do other things. Site owner can see that actions in Yandex.Metrica interface in real time. - -So how do we do that? - -We update data not by updating that data, but adding more data about what have changed. This is usually called CRDT approach, and there is an article on Wikipedia about that. - -It was created to solve conflict problem in transactions but this concept also allows updating data. We use our own data model with this approach. We call it Incremental Log. - -## Incremental Log - -Let's look at an example. - -Here we have one session information with user identifier UserID, number of page viewed PageViews, time spent on site in seconds Duration. There is also Sign field, we describe it later. -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐ -│ 4324182021466249494 │ 5 │ 146 │ 1 │ -└─────────────────────┴───────────┴──────────┴──────┘ -``` -And let's say we calculate some metrics over this data. - -- `count()`- number of sessions -- `sum(PageViews)`- total number of pages all users checked -- `avg(Duration)` - average session duration, how long user usually spent on the website - -Let's say now we have update on that: user checked one more page, so we should change PageViews from 5 to 6 and Duration from 146 to 185. - -We insert two more rows: -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐ -│ 4324182021466249494 │ 5 │ 146 │ -1 │ -│ 4324182021466249494 │ 6 │ 185 │ 1 │ -└─────────────────────┴───────────┴──────────┴──────┘ -``` - -First one is delete row. It's exactly the same row what we already have there but with Sign set to -1. Second one is updated row with all data set to new values. - -After that we have three rows of data: -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐ -│ 4324182021466249494 │ 5 │ 146 │ 1 │ -│ 4324182021466249494 │ 5 │ 146 │ -1 │ -│ 4324182021466249494 │ 6 │ 185 │ 1 │ -└─────────────────────┴───────────┴──────────┴──────┘ -``` - -The most important part is modified metrics calculation. We should update our queries like this: - -``` text - -- number of sessions -count() -> sum(Sign) - -- total number of pages all users checked -sum(PageViews) -> sum(Sign * PageViews) - -- average session duration, how long user usually spent on the website -avg(Duration) -> sum(Sign * Duration) / sum(Sign) -``` - -You can see that it works as expected over this data. Deleted row 'hide' old row, same values come with + and - signs inside aggregation and annihilate each other. - -Moreover, it works totally fine with changing keys for grouping. If we want to group data by PageViews, all data for PageView = 5 will be 'hidden' for this rows. - -There are some limitations with this approach: - -- It works only for metrics which can be presented through this Sign operations. It covers most cases, but it's not possible to calculate min or max values. There is an impact to uniq calculations also. But it's fine at least for Yandex.Metrica cases, and there are a lot of different analytical calculations; -- You need to remember somehow old value in external system doing updates, so you can insert this 'delete' rows; -- Some other effects; there is a [great answer](https://groups.google.com/forum/#!msg/clickhouse/VixyOUD-K68/Km8EpkCyAQAJ) on Google Groups. - -## CollapsingMergeTree - -ClickHouse has support of Incremental Log model in Collapsing engines family. - -If you use Collapsing family, 'delete' row and old 'deleted' rows will collapse during merge process. Merge is a background process of merging data into larger chunks. Here is a great article about merges and LSMT structures. - -For most cases 'delete' and 'deleted' rows will be removed in terms of days. What's important here is that you will not have any significant overhead on data size. Using Sign field on selects still required. - -Also there is FINAL modifier available over Collapsing family. Using FINAL guarantees that user will see already collapsing data, thus using Sign field isn't required. FINAL usually make tremendous performance degradation because ClickHouse have to group data by key and delete rows during SELECT execution. But it's useful when you want to check your queries or if you want to see raw, unaggregated data in their final form. - -## Future Plans - -We know that current feature set is not enough. There are some cases which do not fit to limitations. But we have huge plans, and here are some insights what we've preparing: - -- Partitions by custom key: current partitioning scheme is binded to months only. We will remove this limitation and it will be possible to create partitions by any key. All partition operations like FETCH PARTITION will be available. -- UPDATE and DELETE: there are a lot of issues with updates and deletes support. Performance degradation, consistency guarantees, distributed queries and more. But we believe that if you need to update few rows of data in your dataset, it should not be painful. It will be done. - diff --git a/website/blog/en/2016/yandex-opensources-clickhouse.md b/website/blog/en/2016/yandex-opensources-clickhouse.md deleted file mode 100644 index 16618824e5e..00000000000 --- a/website/blog/en/2016/yandex-opensources-clickhouse.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: 'Yandex Opensources ClickHouse' -image: 'https://blog-images.clickhouse.com/en/2016/yandex-opensources-clickhouse/main.jpg' -date: '2016-06-15' -tags: ['announcement', 'GitHub', 'license'] -author: 'Alexey Milovidov' ---- - -Today [analytical DBMS ClickHouse](https://clickhouse.com/) initially developed internally at Yandex, became available to everyone. Source code is published on [GitHub](https://github.com/ClickHouse/ClickHouse) under Apache 2.0 license. - -ClickHouse allows interactive analytical query execution on data updated in real time. System is able to scale to tens of trillions of rows and petabytes of stored data. Using ClickHouse opens up opportunities that were hard to imagine: you can store full stream of data and slice and dice it to produce reports without offline aggregation. ClickHouse was initially developed as a backend for [Yandex.Metrica](https://metrika.yandex.com/) — second largest web analytics system in the world. - -[Discussion on Hacker News](https://news.ycombinator.com/item?id=11908254). diff --git a/website/blog/en/2017/clickhouse-at-data-scale-2017.md b/website/blog/en/2017/clickhouse-at-data-scale-2017.md deleted file mode 100644 index e288d6e455d..00000000000 --- a/website/blog/en/2017/clickhouse-at-data-scale-2017.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: 'ClickHouse at Data@Scale 2017' -image: 'https://blog-images.clickhouse.com/en/2017/clickhouse-at-data-scale-2017/main.jpg' -date: '2017-06-15' -tags: ['conference', 'Seattle', 'USA', 'America', 'events'] -author: 'Alexey Milovidov' ---- - -![iframe](https://www.youtube.com/embed/bSyQahMVZ7w) - -[Slides](https://presentations.clickhouse.com/data_at_scale/) diff --git a/website/blog/en/2017/clickhouse-at-percona-live-2017.md b/website/blog/en/2017/clickhouse-at-percona-live-2017.md deleted file mode 100644 index 12966ffa726..00000000000 --- a/website/blog/en/2017/clickhouse-at-percona-live-2017.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: 'ClickHouse at Percona Live 2017' -image: 'https://blog-images.clickhouse.com/en/2017/clickhouse-at-percona-live-2017/main.jpg' -date: '2017-04-28' -tags: ['meetup', 'Santa Clara', 'Bay Area', 'California', 'USA', 'America', 'events', 'Graphouse'] ---- - -For those who haven't heard, [Percona Live](https://percona.com/live/17) is probably one of the largest international conferences about opensource database management systems, having 12 talk tracks in parallel. It's been around for many years and initially, it was focused mainly on MySQL (and had that in its name), but nowadays it is more generic and other products of this category get lots of attention too. Needless to say that for a relatively new player on the market like [ClickHouse](https://clickhouse.com/), it's been a great opportunity to spread the word about the technology and how exactly it allows us to perform analytics on petabytes of data in real-time. - -![Percona Live](https://blog-images.clickhouse.com/en/2017/clickhouse-at-percona-live-2017/1.jpg) - -Yandex team members had three chances to talk about ClickHouse from the stage: - -1. A large portion of [Opening Keynote](https://www.percona.com/blog/2017/04/25/percona-live-2017-day-one-keynotes/) has been dedicated to different time-series databases. ClickHouse is not really a specialized time-series database but still outperforms many alternatives if used as such. So Dmitry Andreev, Head of Yandex.Market Infrastructure Development Group, had a short talk about how ClickHouse can be used a as storage backend for Graphite using [Graphouse](https://github.com/clickhouse/graphouse), an open-source adapter that implements this. This setup is used in Yandex.Market and number of other Yandex services and have proven to be very reliable and effective. Chain of short talks has been followed by a live panel about time series in general with the same speakers including Dmitry. Unfortunately, as we figured out later, many keynote attendees perceived ClickHouse as just yet another time-series database and missed the explicitly said part that it opens up way more opportunities to analyze data. -2. Victor Tarnavsky, Head of Yandex.Metrica, and Alexey Milovidov, Head of ClickHouse Development Group, gave a full-length talk about ClickHouse overview, capabilities, features and use cases. Their video has not been recorded, but you can check out [the slides](https://presentations.clickhouse.com/percona2017/ClickHouse%20Percona%20Santa%20Clara%202.0.pdf). -3. Later on, Dmitry Andreev went deeper on the same topic he covered on an opening keynote. He spoke in more detail about how Graphouse works, shown the benchmark results and future plans of the project. Also, [only slides](https://www.percona.com/live/17/sites/default/files/slides/clickhouse-as-timeseries-database.pdf) are available. - -![Keynote](https://blog-images.clickhouse.com/en/2017/clickhouse-at-percona-live-2017/2.gif) - -Besides, ClickHouse has been represented in the exhibition accompanying the conference. Altinity, the private company independent from Yandex that provides consulting and support services for ClickHouse, organized the booth and invited Yandex team members to join them to talk about ClickHouse with conference attendees which appeared to be quite productive. - -![ClickHouse Booth](https://blog-images.clickhouse.com/en/2017/clickhouse-at-percona-live-2017/3.jpg) diff --git a/website/blog/en/2017/clickhouse-meetup-in-berlin-october-5-2017.md b/website/blog/en/2017/clickhouse-meetup-in-berlin-october-5-2017.md deleted file mode 100644 index b882cf81fa2..00000000000 --- a/website/blog/en/2017/clickhouse-meetup-in-berlin-october-5-2017.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse Meetup in Berlin, October 5, 2017' -image: 'https://blog-images.clickhouse.com/en/2017/clickhouse-meetup-in-berlin-october-5-2017/main.jpg' -date: '2017-10-19' -tags: ['meetup', 'Berlin', 'Germany', 'events'] ---- - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3hO_3kCUFZLdcIQuI3gghZ8) - -All presentations are available for download at [the event page](https://events.yandex.com/events/meetings/05-10-2017/). diff --git a/website/blog/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017.md b/website/blog/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017.md deleted file mode 100644 index 832cf476b6a..00000000000 --- a/website/blog/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'ClickHouse Meetup in Santa Clara on May 4, 2017' -image: 'https://blog-images.clickhouse.com/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017/main.jpg' -date: '2017-05-11' -tags: ['meetup', 'Santa Clara', 'Bay Area', 'California', 'USA', 'America', 'events'] ---- - -After [Percona Live 2017](clickhouse-at-percona-live-2017.md), ClickHouse team stayed for one more week in San Francisco Bay Area to meet with local companies in person to talk about ClickHouse and how it can be applied to their tasks. On the last evening we even managed to organize our own meetup with active ClickHouse users in the area, not as large as we regularly host in Russia, but still had some very interesting discussions. diff --git a/website/blog/en/2017/join-the-clickhouse-meetup-in-berlin.md b/website/blog/en/2017/join-the-clickhouse-meetup-in-berlin.md deleted file mode 100644 index d9ba8890ad0..00000000000 --- a/website/blog/en/2017/join-the-clickhouse-meetup-in-berlin.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: 'Join the ClickHouse Meetup in Berlin' -image: 'https://blog-images.clickhouse.com/en/2017/join-the-clickhouse-meetup-in-berlin/main.jpg' -date: '2017-10-19' -tags: ['announcement', 'meetup', 'Berlin', 'Germany', 'events'] ---- - -Come learn about ClickHouse, our open source high-performance column-oriented database management system at a meetup on October 5, 2017 at the Park Inn at Alexanderplatz 7 in Berlin. - -ClickHouse can generate custom data reports in real time and process billions of rows and dozens of gigabytes of data per single server per second. It works up to a thousand times faster than traditional approaches. ClickHouse is linearly scalable, hardware-efficient, fault-tolerant, and can be deployed across multiple data centers. Among other features, ClickHouse offers a user-friendly SQL query dialect with a number of built-in analytics capabilities. - -Join us at the meetup to learn why hundreds of companies across Europe, US, and China are adopting ClickHouse. Through interactive talks, attendees will learn about product features, how ClickHouse can benefit them, and how to use this system in practice. -Attending the ClickHouse meetup is free. [Please register to join us](https://events.yandex.com/events/meetings/05-10-2017/). diff --git a/website/blog/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md b/website/blog/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md deleted file mode 100644 index 421f03ae226..00000000000 --- a/website/blog/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'Announcing ClickHouse Meetup in Amsterdam on November 15' -image: 'https://blog-images.clickhouse.com/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15/main.jpg' -date: '2018-10-17' -tags: ['meetup', 'Amsterdam', 'Netherlands', 'events', 'announcement'] ---- - -Yet another meetup of ClickHouse community is planned in Europe, see detailed agenda and register on [the event page](https://events.yandex.com/events/meetings/15-11-2018/). diff --git a/website/blog/en/2018/clickhouse-at-analysys-a10-2018.md b/website/blog/en/2018/clickhouse-at-analysys-a10-2018.md deleted file mode 100644 index 6c111420497..00000000000 --- a/website/blog/en/2018/clickhouse-at-analysys-a10-2018.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: 'ClickHouse at Analysys A10 2018' -image: 'https://blog-images.clickhouse.com/en/2018/clickhouse-at-analysys-a10-2018/main.jpg' -date: '2018-11-04' -tags: ['conference', 'Beijing', 'China', 'events', 'Analysys', 'Asia'] ---- - -[Analysys A10](https://event.analysys.cn/pc/2018/index.html) is a large conference on Big Data that took place on October 26-27 in Beijing. Since China's population is huge, it generates a lot of data and Big Data industry is in very high demand. ClickHouse team has been honored to participate in this event alongside top management, analysts, and IT professionals from various Chinese companies. - -Each year Analysys also organizes the OLAP contest. The second year in a row the same team of Sundy Li (李本旺) and Winter Zhang (张健) wins it by using ClickHouse as the core of their solution. The task was to calculate complex marketing funnel as fast as possible. - -Sundy Li (李本旺) receives award for winning Analysys OLAP contest 2018 from William Kwok (郭炜): -![Sundy Li and William Kwok](https://blog-images.clickhouse.com/en/2018/clickhouse-at-analysys-a10-2018/1.jpg) - -The first day of the conference we mostly spent talking with people on ClickHouse booth, while on the second day there were two technical talks about ClickHouse. - -Alexey Milovidov demonstrates ClickHouse and how it works internally: -![Alexey Milovidov](https://blog-images.clickhouse.com/en/2018/clickhouse-at-analysys-a10-2018/2.jpg) - -Sundy Li (李本旺) explains the audience how they won the OLAP contest using ClickHouse: -![Sundy Li](https://blog-images.clickhouse.com/en/2018/clickhouse-at-analysys-a10-2018/3.jpg) - -The next day after A10 was a dedicated ClickHouse Community Meetup in Beijing, but it deserves a separate recap post. - -Analysys A10 afterparty: -![Analysys A10 afterparty](https://blog-images.clickhouse.com/en/2018/clickhouse-at-analysys-a10-2018/4.jpg) - diff --git a/website/blog/en/2018/clickhouse-at-percona-live-europe-2018.md b/website/blog/en/2018/clickhouse-at-percona-live-europe-2018.md deleted file mode 100644 index 0fc8660c92c..00000000000 --- a/website/blog/en/2018/clickhouse-at-percona-live-europe-2018.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: 'ClickHouse at Percona Live Europe 2018' -image: 'https://blog-images.clickhouse.com/en/2018/clickhouse-at-percona-live-europe-2018/main.jpg' -date: '2018-11-21' -tags: ['conference', 'Frankfurt', 'Germany', 'events', 'Percona Live', 'Europe'] ---- - -Open-source database management systems conference [Percona Live](https://www.percona.com/live/e18/) this time took place on November 5-7 in Germany, Frankfurt am Main. Over last couple years ClickHouse became a solid member of this community as demand in analytics with subsecond latencies appeared to be pretty high. - -There were three talks about ClickHouse in agenda, while only one of them was from Yandex. Also we had a lot of interesting conversations with conference attendees at ClickHouse booth sponsored by Altinity. - -Alexander Zaitsev, CTO and co-founder of Altinity, gives an overview of ClickHouse and then demonstrates case studies and best practices ([slides](https://presentations.clickhouse.com/percona_europe_2018/Altinity.pdf)): -![](https://blog-images.clickhouse.com/en/2018/clickhouse-at-percona-live-europe-2018/1.jpg) - -Fast! Flexible! Free! Fun! -![Fast! Flexible! Free! Fun!](https://blog-images.clickhouse.com/en/2018/clickhouse-at-percona-live-europe-2018/2.jpg) - -Aleksey Milovidov, lead ClickHouse developer from Yandex, talks about unusual and unique ClickHouse features ([slides](https://presentations.clickhouse.com/percona_europe_2018)): -![Aleksey Milovidov](https://blog-images.clickhouse.com/en/2018/clickhouse-at-percona-live-europe-2018/3.jpg) - -Aleksandar Aleksandrov and Felix Mattrat, data engineers from MessageBird, show how they use ClickHouse to analyze process of delivery of SMS and other kinds of messages ([slides](http://presentations.clickhouse.com/percona_europe_2018/MessageBird.pdf)): -![Aleksandar Aleksandrov and Felix Mattrat](https://blog-images.clickhouse.com/en/2018/clickhouse-at-percona-live-europe-2018/4.jpg) - -Live demo at ClickHouse booth by Alexey Milovidov: -![Demo at ClickHouse booth by Alexey Milovidov](https://blog-images.clickhouse.com/en/2018/clickhouse-at-percona-live-europe-2018/5.jpg) diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md b/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md deleted file mode 100644 index 63293f1ff67..00000000000 --- a/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: 'ClickHouse Community Meetup in Beijing on January 27, 2018' -image: 'https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/main.jpg' -date: '2018-02-08' -tags: ['meetup', 'Beijing', 'China', 'events', 'Asia'] ---- - -Last year there has been an OLAP algorithm contest in China organized by Analysys. The team who have shown the top results and won the competition has been using ClickHouse as the core of their solution. Other teams were mostly using different technologies and didn't really know much about ClickHouse at a time. When the final results were published, many people in China who participated in or were aware of this competition became really eager to learn more about ClickHouse. This spike of interest about ClickHouse in China has eventually lead to the first Chinese ClickHouse Community Meetup that has taken place in Beijing. - -Welcome word by William Kwok, CTO of Analysys, who personally played a huge role in making this event possible: -![William Kwok, CTO of Analysys](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/1.jpg) - -It was probably the most intense ClickHouse Meetup compared to all previous ones worldwide. The main part of the event took over 6 hours non-stop and there were also either pre-meetup and after-party on the same day. Well over 150 people have shown up on Saturday to participate. - -Audience listening for ClickHouse introduction by Alexey Milovidov: -![ClickHouse introduction by Alexey Milovidov](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/2.jpg) - -Alexey Milovidov has started the main meetup session with an introductory talk about ClickHouse, it's usage inside Yandex and history that lead to becoming an open-source analytical DBMS ([slides](https://presentations.clickhouse.com/meetup12/introduction/)). - -Alexander Zaitsev's practical talk about migrating to ClickHouse: -![Alexander Zaitsev](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/3.jpg) - -Alexander Zaitsev has shared his vast experience in migrating to ClickHouse. LifeStreet, advertisement company where he works, was one of the first companies outside of Yandex which switched to ClickHouse from other analytical DBMS in production. Later on, Alexander also co-founded Altinity, a company that specializes in helping others to migrate to ClickHouse and then effectively use it to achieve their business goals. The talk has covered many specific topics that are important for those who are in the middle of such migration or just considering it ([Slides](https://presentations.clickhouse.com/meetup12/migration.pptx)). - -Alexey Zatelepin explaining how ClickHouse sparse index works and other implementation details: -![Alexey Zatelepin](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/4.jpg) - -Alexey Zatelepin's technical talk was focused on providing engineers some insights on why ClickHouse is that fast in OLAP workloads and how to leverage its design and core features as a primary index, replication, and distributed tables to achieve great performance and reliability ([slides](https://presentations.clickhouse.com/meetup12/internals.pdf)). - -Jack Gao gives an extensive overview of ClickHouse and it's use cases in Chinese: -![Jack Gao](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/5.jpg) - -As we have learned during meet up and the rest of our business trip, actually there are many companies in China that are already using or seriously evaluating ClickHouse to use either part of their products or for internal analytics. Three of them are doing this long and extensively enough to give a full talk about their progress and experience. - -In China, in general, and especially in Beijing the knowledge of English is not really common. Chinese people working in the IT industry have to know English well enough to read documentation, but it does not really imply that they can talk or understand verbal English well. So the talks by representatives of local companies were in Chinese. - -Jack Gao, ex-DBA and now an analyst at Sina (major social network) have dedicated a significant part of his talk to go over fundamental topics essential to most ClickHouse users. It partially overlapped with previous talks, but this time in Chinese. Also, he covered not only use case of ClickHouse in Sina but also other publicly known cases by other companies. Considering the reaction of the audience, it has been the most useful talk of the whole meetup, because of the widely useful content, lack of language barrier, and excellent execution of presentation. We even had to sacrifice initially scheduled a short break to give Jack some additional time ([slides](https://presentations.clickhouse.com/meetup12/power_your_data.pdf)). - -Yang Xujun from Dataliance / UltraPower, which provides outsourced data analysis platform to telecom companies in China, have demonstrated why they decided to move away from reports prepared offline in Apache Hadoop / Spark and exported to MySQL towards ClickHouse. In short: Hadoop is too slow and cumbersome ([slides](https://presentations.clickhouse.com/meetup12/telecom.pdf)). - -It might sound obvious, but the huge Chinese population generates insane amounts of data to store and process. So IT companies operating mostly on the local Chinese market are often handling amounts of information comparable to even the largest global companies. - -Kent Wang from Splunk Shanghai R&D center has demonstrated the current state of ClickHouse integration into Splunk ecosystem. Basically, they have plugged ClickHouse into their system via JDBC driver to allow data from ClickHouse to be easily accessed in Splunk UI and dashboards. Last spring ClickHouse team actually had a friendly visit to Splunk office in San Francisco to discuss potential points of interaction and exchange experience, so it was great to hear that there's some real progress in that direction ([slides](https://presentations.clickhouse.com/meetup12/splunk.pdf)). - -The last talk was for the most tenacious ClickHouse users. Alexey Milovidov has announced some recently released features and improvements and shared what's coming next either in the short and long term [slides](https://presentations.clickhouse.com/meetup12/news_and_plans/). - -Here is an over 5 hours long video recording of main meetup session: - -![iframe](https://www.youtube.com/embed/UXw8izZGPGk) - -If you are from China or at least can read Chinese, you might consider joining the **[Chinese ClickHouse User Group](http://www.clickhouse.com.cn/)**. - -{## Likely outdated in favor of YouTube - -There is an over 5 hours long video recording of main meetup session, but it'll take a bit of effort to get access to it (especially if you are not from China): http://m.zm518.cn/zhangmen/livenumber/share/entry/?liveId=1460023&sharerId=6fd3bac16125e71d69-899&circleId=b0b78915b2edbfe6c-78f7&followerId=×tamp=1517022274560 -You'll need to install WeChat (probably one of the most popular messengers in the world, everyone in China has it) on your smartphone: Android or iOS. https://play.google.com/store/apps/details?id=com.tencent.mm https://itunes.apple.com/ru/app/wechat/id414478124?mt=8 -On the first launch, WeChat will ask to confirm your phone number via SMS, read some digits via a microphone and accept the user agreement. Go through this. -On your computer, click the red button in the middle of the video behind the link above. It'll show a QR code. Now in WeChat in the top-right corner, there's the “+” button which opens a menu that has a “Scan QR code” item. Use it to scan QR code from your computer screen, then press the “Sign in” button on the smartphone. Now the video on the computer automatically becomes playable. -If you are from China or at least can read Chinese, you might consider joining the Chinese ClickHouse User Group. - -ClickHouse Community Meetup afterparty. -##} - -Pre-meetup meeting of speakers and most active ClickHouse users in China: -![Pre-meetup meeting](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/6.jpg) - -ClickHouse Community Meetup afterparty: -![ClickHouse Community Meetup afterparty](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/7.jpg) diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md b/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md deleted file mode 100644 index 5934e34cb25..00000000000 --- a/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: 'ClickHouse Community Meetup in Beijing on October 28, 2018' -image: 'https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/main.jpg' -date: '2018-11-12' -tags: ['meetup', 'Beijing', 'China', 'Asia', 'events'] ---- - -Interest in ClickHouse among Chinese experts is growing rapidly. It was second ClickHouse Meetup in Beijing this year and the venue was more than full, it could fit only about 170 people out of 500 who signed up and around 2000 more joined the live translation online. Many Chinese companies have already adopted ClickHouse in production and are willing to share their experience. - -See the **[video recording of all talks](http://play.yunxi.tv/livestream/flash?id=05527cf6e260448b9d880b99d2cf4d40)** and **[all slides](https://github.com/ClickHouse/clickhouse-presentations/tree/master/meetup19)**. - -Welcome word by William Kwok (郭炜), CTO of Analysys, who played a key role in organizing this event: -![William Kwok](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/1.jpg) - -Nikolay Kochetov from Yandex demonstrating recent advancements in string processing optimization using LowCardinality feature: -![Nikolay Kochetov from Yandex](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/2.jpg) - -Shang Shujie (尚书杰) from Kuaishou gives an overview of ClickHouse and it's usage scenarios: -![Shang Shujie](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/3.jpg) - -Winter Zhang (张健) from QingCloud explains their services based on ClickHouse: -![Winter Zhang](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/4.jpg) - -Audience listening to Zhang's talk: -![Audience listening to Zhang's talk](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/5.jpg) - -Li Junfei (李俊飞) from Tencent explains how ClickHouse fits their data processing infrastructure: -![Li Junfei](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/6.jpg) - -Questions&Answers session: -![Q&A](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/7.jpg) - -Jack Gao (高鹏) from Sina explains their ClickHouse use case and gives some advice based on their extensive experience with ClickHouse: -![Jack Gao](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/8.jpg) - -Chinese developers are also one of the most active worldwide in contributing to ClickHouse source code compared to other countries. Chinese ClickHouse Contributors Awards 2018 ceremony was also part of the meetup agenda with the following: - -1. 张建 (Winter Zhang, zhang2014) received First Place among independent ClickHouse developers in China for 2018, having developed 22 new features, improvements, and fixes in 57 pull requests. -2. Amos Bird received Second Place among independent ClickHouse developers in China for 2018, having developed 16 new features, improvements, and fixes in 42 pull requests. -3. 李本旺 (sundy-li) received Third Place among independent ClickHouse developers in China for 2018, having developed 6 new features, improvements, and fixes in 11 pull requests. - -A special award went to William Kwok (郭炜) for his active role in developing the Chinese ClickHouse Community. - -Sundy Li (李本旺) receives ClickHouse Contributor Award from Alexey Milovidov: -![Sundy Li](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/9.jpg) - -William Kwok (郭炜) receives special award for organizing Chinese ClickHouse community and meetups: -![William Kwok](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/10.jpg) - -Pre-meetup at the Analysys office: -![Pre-meetup](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/11.jpg) - - - diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md b/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md deleted file mode 100644 index 2f86859c3f9..00000000000 --- a/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: 'ClickHouse Community Meetup in Berlin on July 3, 2018' -image: 'https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018/main.jpg' -date: '2018-07-05' -tags: ['meetup', 'Berlin', 'Germany', 'events'] ---- - -Just a few months ago Brenno Oliveira from Delivery Hero has dropped us an email saying that they want to host a meetup of ClickHouse community in their HQ and together we made it happen. Actually, renting a suitable room is one of the main limiting factors on how often ClickHouse meetups can happen worldwide and it was very kind of Delivery Hero to provide it for free. Bringing interesting speakers was the easy part as there are more and more companies adopting ClickHouse and willing to share their stories. Being an open-source product has its advantages after all. About 50 people have shown up from 75 sign-ups, which is way above the typical rate. - -To get started Alexander Zaitsev from Altinity gave an overview of ClickHouse for those who are not that familiar with the technology yet. He was using use cases from his personal experience and their clients as examples. Here are [the slides](https://presentations.clickhouse.com/meetup16/introduction.pdf), unfortunately, no video this time. - -Gleb Kanterov talking about the usage of ClickHouse for experimentation metrics at Spotify: -![Gleb Kanterov Spotify](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018/1.jpg) - -![Gleb Kanterov Spotify](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018/2.jpg) - -Spotify relies heavily on what Google Cloud Platform provides, but nevertheless found a spot in their infrastructure where only ClickHouse appeared to satisfy the requirements. Gleb Kanterov has demonstrated their approach to conducting experiments and measuring if they are worth being promoted to production solutions. Using ClickHouse has allowed them to build a framework scalable to thousands of metrics, which in the end makes them move even faster and break fewer things. Checking out [full slides](https://presentations.clickhouse.com/meetup16/spotify.pdf) is highly recommended and here are a few quotes: - -- **Requirements** - - Serve 100-s of QPS with sub-second latency - - We know in advance what are queries and data - - Maintain 10x metrics with the same cost - - Thousands of metrics - - Billions of rows per day in each of 100-s of tables - - Ready to be used out of the box - - Leverage existing infrastructure as much as feasible - - Hide unnecessary complexity from internal users -- **Why ClickHouse?** - - Build proof of concept using various OLAP storages (ClickHouse, Druid, Pinot,...) - - ClickHouse has the most simple architecture - - Powerful SQL dialect close to Standard SQL - - A comprehensive set of built-in functions and aggregators - - Was ready to be used out of the box - - Superset integration is great - - Easy to query using clickhouse-jdbc and jooq - -The last talk by Alexey Milovidov was pretty technical and mostly intended for a deeper understanding of what's going on inside ClickHouse, see [the slides](https://presentations.clickhouse.com/meetup16/internals.pdf). There were many experienced users in the audience who didn't mind staying late to hear that and ask very relevant questions. Actually, we had to leave the building way before people were out of topics to discuss. - -If your company regularly hosts technical meetups and you are looking for interesting topics to talk about, ClickHouse might be in pretty high demand. Feel free to write ClickHouse team via [this form](http://clickhouse.com/#meet) if you are interested to host a similar event in your city and we'll find a way to cooperate and bring in other ClickHouse community members. diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3.md b/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3.md deleted file mode 100644 index beb06f1f3d4..00000000000 --- a/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'Announcing ClickHouse Community Meetup in Berlin on July 3' -image: 'https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-berlin-on-july-3/main.jpg' -date: '2018-06-25' -tags: ['meetup', 'Berlin', 'Germany', 'events', 'announcement'] ---- - -There's yet another upcoming meetup of ClickHouse community in Europe, see detailed agenda and sign up on [the event page](https://bitly.com/2Jv9Bug). diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md b/website/blog/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md deleted file mode 100644 index f94d2de411c..00000000000 --- a/website/blog/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: 'ClickHouse Community Meetup in Paris on October 2, 2018' -image: 'https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/main.jpg' -date: '2018-10-09' -tags: ['meetup', 'Paris', 'France', 'events'] ---- - -Agenda of Paris ClickHouse Meetup was full of use cases, mostly from France-based companies which are actively using ClickHouse. Slides for all talks are [available on the GitHub](https://github.com/clickhouse/clickhouse-presentations/tree/master/meetup18). - -Christophe Kalenzaga and Vianney Foucault, engineers from Contentsquare, company that provided the meetup venue: -![Christophe Kalenzaga and Vianney Foucault](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/1.jpg) - -Matthieu Jacquet from Storetail (Criteo): -![Matthieu Jacquet](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/2.jpg) - -The audience: -![Audience](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/3.jpg) - -Networking after the meetup: -![Networking](https://blog-images.clickhouse.com/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/4.jpg) diff --git a/website/blog/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md b/website/blog/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md deleted file mode 100644 index ad408d51658..00000000000 --- a/website/blog/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: 'ClickHouse Meetup in Amsterdam on November 15, 2018' -image: 'https://blog-images.clickhouse.com/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/main.jpg' -date: '2018-11-22' -tags: ['meetup', 'Amsterdam', 'Netherlands', 'events'] ---- - -20th ClickHouse Meetup took place in Amsterdam, which appeared to be a convenient location for people from all over Europe to join the event, including Austria, Czech Republic and Germany. We were also glad to see people from many local companies including Booking.com, Crobox, Marktplaats (eBay), MessageBird and others. - -Aleksandar Aleksandrov and Felix Mattrat, data engineers from MessageBird, show how they use ClickHouse to analyze process of delivery of SMS and other kinds of messages: -![Aleksandar Aleksandrov and Felix Mattrat](https://blog-images.clickhouse.com/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/1.jpg) - -Nikolay Kochetov from ClickHouse team demonstrates recent features related to string processing optimization: -![Nikolay Kochetov from ClickHouse team](https://blog-images.clickhouse.com/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/2.jpg) - -Konstantin Ignatov from Qrator Labs shares his experience in using ClickHouse as time-series database: -![Konstantin Ignatov from Qrator Labs](https://blog-images.clickhouse.com/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/3.jpg) - -Aurimas Jacikevicius from Altinity demonstrates benchmark of ClickHouse against TimescaleDB and InfluxDB under time-series workload: -![Aurimas Jacikevicius from Altinity](https://blog-images.clickhouse.com/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/4.jpg) - -Roy Brondgeest from Crobox showcases [ClickHouse Scala reactive client](https://github.com/crobox/clickhouse-scala-client) and it's bundled [DSL for query building](https://github.com/crobox/clickhouse-scala-client/wiki): -![Roy Brondgeest from Crobox](https://blog-images.clickhouse.com/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/5.jpg) - -Alexey Milovidov from Yandex closes the meetup with talk about performance analysis of ClickHouse queries: -![Alexey Milovidov from Yandex](https://blog-images.clickhouse.com/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/6.jpg) - diff --git a/website/blog/en/2018/concept-cloud-mergetree-tables.md b/website/blog/en/2018/concept-cloud-mergetree-tables.md deleted file mode 100644 index 4d19a3299b3..00000000000 --- a/website/blog/en/2018/concept-cloud-mergetree-tables.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: 'Concept: "Cloud" MergeTree Tables' -image: 'https://blog-images.clickhouse.com/en/2018/concept-cloud-mergetree-tables/main.jpg' -date: '2018-11-23' -tags: ['concept', 'MergeTree', 'future', 'sharding'] ---- - -The main property of the MergeTree cloud tables is the absence of manual control over the sharding scheme of data on a cluster. The data in the cloud tables are distributed around the cluster on its own, while at the same time providing the locality property for a certain key. - -## Requirements - -1. Creating a cloud table makes it visible on all nodes of the cluster. No need to manually create a separate Distributed table and local tables on each node. -2. When ingesting data to a cloud table, while the table is very small, data is distributed across several cluster servers, but as data grows, more servers are involved (for example, starting from gigabytes per server). The user can create a small table and it should not be too cumbersome; but when creating a table, we do not know in advance how much data will be loaded into it. -3. The user specifies a sharding key (arbitrary tuple). Data for the sharding key range (in lexicographical order) is located on some servers. Very small ranges are located on several servers and to access it is enough to read data from a single server, while sufficiently large ranges are spread across all servers. For example, if we are talking about web analytics the sharding key might start with CounterID, the website identifier. Data on a large site like https://yandex.ru should be spread across all servers in the cluster, while data on a small site should be located on only a few servers. Physical explanation: the cluster should scale to simultaneously provide throughput for heavy queries and to handle high QPS of light queries, and for light queries, the latency should not suffer. In general, this is called data locality. -4. The ability for heavy queries to use all the servers in the cluster, rather than 1 / N, where N is the replication coefficient. Thus, one server can contain multiple replicas of different shards. -5. When replacing the server with an empty one (node recovery), the data restore must be parallelized in some way. At least the reads should be spread over different servers to avoid overloading individual servers. -6. On each local server, reading the range of the primary key should be touching not a very large number of file ranges or not too small file ranges (minimizing disk seeks). -7. (Optional) The ability to use individual disks instead of RAID, but at the same time preserving throughput when reading medium-sized primary key ranges and preserving QPS when reading small-sized ranges. -8. The ability to create multiple tables with a common sharding scheme (co-sharding). -9. Rebalancing data when adding new servers; creation of additional replicas with long unavailability of old servers. -10. SELECT queries should not require synchronous requests to the coordinator. No duplicates or missing data visible by SELECT queries during data rebalancing operations. -11. SELECT queries must choose large enough subset of servers considering conditions on sharding key and knowledge of the current sharding scheme. -12. The ability to efficiently distribute data across servers with uneven available disk space. -13. Atomicity of INSERT on a cluster. - -Out of scope and will not be considered: - -1. Erasure data encoding for replication and recovery. -2. Data storage on systems with different disks - HDD and SSD. An example is storing fresh data on an SSD. - -## General Considerations - -A similar problem usually (in Map-Reduce or blob-storage systems) is solved by organizing data in chunks. Chunks are located on the nodes of the cluster. Mappings: table or file -> chunks, chunk -> nodes, are stored in the master, which itself can be replicated. The master observes the liveliness of nodes and maintains a reasonable replication level of all chunks. - -Difficulties arise when there are too many chunks: in this case, the master does not cope with the storage of metadata and with the load. It becomes necessary to make complicated metadata sharding. - -In our case, it may seem tempting to solve a problem in a similar way, where instead of a chunk, an instance of a MergeTree type table containing the data range is used. Chunks in other systems are called “tablets” or “regions”. But there are many problems with this. The number of chunks on one server cannot be large, because then the property is violated - minimizing the number of seeks when reading data ranges. The problem also arises from the fact that each MergeTree table itself is rather cumbersome and consists of a large number of files. On the other hand, tables with a size of one terabyte are more or less normal if the data locality property is maintained. That is if several such tables on one server begin to be used only for not too small data ranges. - -A variety of options can be used for sharding data, including: -Sharding according to some formula with a small number of parameters. Examples are simple hashing, consistent hashing (hash ring, rendezvous hashing, jump consistent hashing, sumbur). The practice of using in other systems shows that in its pure form this approach does not work well, because the sharding scheme is poorly controlled. Fits fine, for example, for caches. It can also be used as part of another algorithm. - -The opposite option is that the data is divided into shards using an explicitly specified table. The table may contain key ranges (or, in another case, hash ranges from keys) and their corresponding servers. This gives a much greater degree of freedom in choosing when and how to transfer data. But at the same time, to scale the cluster, the size of the table has to be dynamically expanded, breaking the existing ranges. - -One of the combined options is that the mapping is made up of two parts: first, the set of various keys is divided into some pre-fixed not too few and not too many “virtual shards” (you can also call “logical shards”, “mini-shards”). This number is several times larger than the hypothetical cluster size in the number of servers. Further, the second mapping explicitly specifies the location of each mini-shard on the servers, and this second mapping can be controlled arbitrarily. - -The complexity of this approach is that partitioning hash ranges gives uniformity, but does not give locality of data for range queries; whereas when splitting by key ranges, it is difficult to choose a uniform distribution in advance since we do not know what the distribution of data will be to the keys. That is, the approach with the choice of a pre-fixed split into mini-shards does not work if data locality is required. - -It turns out that the only acceptable approach in our case is partitioning by key ranges, which can change dynamically (repartitioned). At the same time, for more convenience, manageability, and uniformity of data distribution, the number of partitioning elements can be slightly larger than the number of servers, and the mapping from the partitioning element into servers can be changed separately. - -## Possible Implementation - -Each ClickHouse server can participate in a certain cloud. The cloud is identified by a text string. The membership of a node in the cloud can be ensured by creating a certain type of database on the node (IDatabase). Thus, one node can be registered in several clouds. Registry of the nodes registered in the cloud is maintained in the coordinator. - -Cloud nodes are selected to accommodate the replicas of the shards of cloud tables. The node also sends some additional information to the coordinator for its selection when placing data: the path that determines the locality in the network (for example, data center and rack), the amount of disk space, etc. - -The cloud table is created in the corresponding database registered in the cloud. The table is created on any server and is visible in all databases registered in the cloud. - -Sharding key is set for cloud table on it“s creation, an arbitrary tuple. Sometimes it is practical that the sharding key matches the primary key (example - (CounterID, Date, UserID)), sometimes it makes sense that it is different (for example, the DateTime primary key, sharding key - UserID). - -Sharding is a composition of several mappings: - -1. The set of all possible tuples, the values ​​of the sharding key, is mapped onto many half-intervals that break the half-interval [0, 1). Initially, this number is the size of the partition, it is equal to 1. That is, all values ​​are mapped into a single semi-interval, the whole set [0, 1). Then, as the amount of data in the table increases, the semi-intervals, the split elements, can be divided approximately in half by the median of the distribution of values ​​in lexicographical order. -2. For each half-interval splitting, several cloud servers are selected and remembered in some way, on which replicas of the corresponding data will be located. The choice is made based on the location of servers on the network (for example, at least two replicas in different data centers and all replicas in different racks), the number of replicas already created on this server (choose servers with the minimum) and the amount of free space (from various servers just select the server with the maximum amount of free space). - -As a result, this composition forms a mapping from the sharding key into several replica servers. - -It is assumed that in the course of work both parts of this mapping may change. - -The result of mapping 1 can be called the “virtual shard” or “logical shard”. In the process of work, virtual shards can be divided in half. Going in the opposite direction is impossible - the number of virtual shards can only grow. It is assumed that even for tables occupying the entire cluster, the number of virtual shards will be several times larger than the number of servers (for example, it may be greater by 10 times the replication ratio). Data ranges occupying at least a tenth of all data should be spread across all servers to ensure throughput of heavy queries. The mapping as a whole is specified by the set of boundary values ​​for the sharding key. This set is small (roughly kilobytes) and stored in the coordinator. - -The mapping of virtual shards on real servers can change arbitrarily: the number of replicas can increase when servers are not available for a long time or increase and then decrease to move replicas between servers. -## How to Satisfy All Requirements - -List items below correspond to the requirement numbers above: - -1. IDatabase synchronously goes to the coordinator to get or change the list of tables. The list of cloud tables is stored in the coordinator in the node corresponding to the cloud. That is, all the tables in the cloud are visible on each server entering the cloud. -2. It is ensured by the fact that initially the partition consists of a single element, but begins to break up further with increasing data volume. Each replica responsible for the local storage of this data can initiate the splitting, once the criterion for the data volume has been reached. Multiple replicas may decide to do this competitively, and the decision is made using atomic CAS. To have fewer problems, it is possible to randomize somewhat the moment of deciding repartition. The criterion when it is necessary to additionally break virtual shards turns out to be non-trivial. For example, you can break up to the number of servers * the replication rate quite soon, by growing a shard to several gigabytes. But it is already worth breaking shards even when shards are 1 / N in size from the server size (for example, around a terabyte). In coordinator, you should store the last and previous splits immediately and do not do the splitting too often. -3. It is ensured by the fact that the number of virtual shards will be several times (user-defined) more than the number of servers. Note: for additional data spreading, you can impose some spreading transformation on the sharding key. Not thought out. For example, instead of a key (CounterID, Date, UserID) use for sharding (hash (UserID)% 10, CounterID, Date, UserID). But in this case, even small CounterIDs will fall into 10 ranges. -4. Similarly. -5. If several virtual shards are located on a single server, their replicas will be spread over a larger number of servers, and during recovery, there will be more fanout. -6. Small requests will use one shard. While large requests will use several shards on the same server. But since each shard will be somewhat smaller, the data in the MergeTree table will probably be presented by a smaller set of parts. For example, we now have a maximum part size of 150 GiB, and for large tables, many such large chunks are formed in one partition. And if there are several tables, there will be a smaller number of large chunks in each. On the other hand, when inserting data, a larger number of small pieces will be generated on each server. And these small parts will cause an increase in the number of seeks. But not much, as the fresh data will be in the page cache. That is why too many virtual shards per server might not work well. -7. Pretty hard. You can have groups of neighboring shards on different disks of the same server. But then reading of medium size ranges will not be parallelized (since the whole range will be on one disk). In RAID, the problem is solved by the fact that the size of the chunk is relatively small (typically 1 megabyte). It would be possible to come up with a separate distribution of data in different pieces on different disks. But it is too difficult to design and implement carefully. Probably it“s better not to do the whole thing, and as a minimum, make it so that when on the JBOD server, one server disk is selected for the location of one shard. -8. It is possible to identify the sharding scheme with a string, which may be common to different tables. The criterion for splitting shards is determined based on the total amount of data for all tables with the same sharding scheme. -9. It is solved completely by changing the mapping of virtual shards on the servers. This mapping can be controlled independently of everything else. -10. Servers can cache the sharding map (both parts of it) for a while and update it usually asynchronously. When rebalancing data due to the splitting of virtual shards, you should keep the old data for a longer time. Similarly, when transferring replicas between servers. Upon request, the initiator server also asks if the remote server has the necessary data: data for the required shard according to the sharding scheme that is cached by the initiator server. For the query, one live replica of each shard is selected, on which there is data. If suddenly there were none, then it is worthwhile to update the sharding map synchronously, as for some reason all the replicas were transferred somewhere. -11. It is trivial. -12. It is solved on the basis that more than one shard accounts for one server and the fact that the distribution of shards replicas among servers is more or less arbitrary and can take into account the amount of disk space. -## Issues - -To ingest data into a table, you can send an INSERT query to any server. The data will be divided into ranges and recorded on the desired servers. At the same time, it is synchronously ensured that we use a fresh sharding map - it is requested before the data is inserted and it is checked that it is not out of date, simultaneously with the commit in ZK. - -When a SELECT query is used, if the old sharding map was used, the latest data will not be visible. Therefore, the asynchronous update interval of the sharding map for SELECT should be made customizable, and an option should be added to synchronously use the latest sharding map. - -For fairly large tables, it turns out that an INSERT request breaks the data into many small pieces and writes to all servers (example: with 500 servers, you need to commit 5000 replicas of shards). This should work since the probability of inaccessibility or inhibition of all replicas of one shard is still low. But it will work slowly and, possibly, unstable. With a lot of INSERTs, there will be a terrible load on the coordinator. Although it can withstand one INSERT per second normally. To achieve high throughput of INSERTs, it is sufficient to simply make them parallel, but with the same low frequency of INSERTs in general. However, this is still a big problem. - -There are the following possible solutions: - -1. You can add something to the beginning of the sharding key. For example, Date % 10 or toMinute. Then INSERTs will touch fewer shards (in the typical case when recent data is inserted), but at the same time during some time intervals, some shards will be hotter than others. Normally, if it reduces the number of active shards, for example, from 5000 on INSERT to 500. It is also very inconvenient for users. -2. You can come up with some kind of incomprehensible sharding scheme, where the fresh data first falls into some fresh shard where it is not clear where from where it is then lazily overwritten. A fresh shard is essentially a distributed queue. At the same time, a fresh shard with SELECT is always requested. Not so good. And still, it contradicts the atomicity of these transfers of data, visible at SELECT. Alternatively, you could relax the requirements if you allow SELECT not to see some of the fresh data. -It looks like it“s generally not working well at a cluster size of over 500 servers. -Another problem is that to properly spread the ranges of the primary key, the number of virtual shards must be no less than the number of servers squared. And this is too much. -How to Get Around These Issues -For sharding, you can add some more intermediate mappings. There are the following options: -1. Splitting each shard into a set of shards in an arbitrary way. For example, 10 pieces. This is equivalent to adding a random number 0.N-1 to the beginning of the sharding key, which means nothing. Then with INSERT, you can only insert into one randomly selected shard, or a minimum sized shard, or some kind of round-robin; and as a result, INSERT becomes easier. But this increases the fanout of all point SELECTs. For convenience, such a partition can be done dynamically - only large enough shards can be divided in such a way (this will help avoid excessive splitting of old shards in the case when the sharding key starts with Date and the data is inserted in the Date order) or do such a partition starting from the situation when the number of shards is large enough (restriction on top of fanout INSERT requests). -An additional advantage: in the case of servers with JBOD, it is possible to prefer to place such second-level shards on the disks of one server, which half emulates RAID-0. -But there is a serious drawback: there is no possibility to do local IN / JOIN. For example, this possibility is assumed if the sharding key is hash (UserID), and we do JOIN by UserID. It would be possible to avoid this drawback by always placing all the “symmetric” shards on one server. -2. A mapping that spreads the data while keeping the number of virtual shards. The essence of this mapping is as follows: - - The spreading factor is set, for example, `N = 10.` As the very first mapping, 10 times more ranges are generated. For example, if we want to end up with 7 shards, then we divide the data into 70 ranges. - - Then these ranges are renumbered in a circle with numbers from 0.6 and the ranges with the same number will fall into one shard, as a result, there will be 7 shards again. - - The continuous analogue of this mapping: `x in [0, 1) -> fractional_part (x * N)`, multiplication by N on a circle. - -If you draw it on the picture in Cartesian coordinates, you get a “saw” with 10 teeth. - -After this, it becomes obvious that this mapping simultaneously spreads the data and preserves its locality. - -See also: [Arnold's cat map](https://en.wikipedia.org/wiki/Arnold%27s_cat_map). - -But what is described here does not exactly work. First, until a sufficient amount of data has been accumulated, it is impossible to create a uniform division into parts (there is no place to count quantiles). Secondly, according to such a simple scheme, it is impossible to divide the intervals. - -There is an option in which, instead of dividing a range in half, it uses splitting into 4 parts, which are then mapped into two shards. It is also not clear how this will work. diff --git a/website/blog/en/2019/clickhouse-at-percona-live-2019.md b/website/blog/en/2019/clickhouse-at-percona-live-2019.md deleted file mode 100644 index 37beb163f1d..00000000000 --- a/website/blog/en/2019/clickhouse-at-percona-live-2019.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: 'ClickHouse at Percona Live 2019' -image: 'https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/main.jpg' -date: '2019-06-04' -tags: ['Percona Live','USA','Texas','Austin', 'events', 'conference'] ---- - -This year American episode of [Percona Live](https://www.percona.com/live/19/) took place in nice waterfront location in Austin, TX, which welcomed open source database experts with pretty hot weather. ClickHouse community is undeniably growing and it became a common database product to give a talk about or at least compare or refer to, while just [two short years ago](../2017/clickhouse-at-percona-live-2017.md) it was more like “wth is ClickHouse?”. - -Alexey Rubin from VirtualHealth compared two column-oriented databases: ClickHouse and MariaDB Column Store. Bottom line was no surprise, ClickHouse is noticeably faster and MariaDB is more familiar for MySQL users, details were useful though. -![Alexey Rubin from VirtualHealth](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/1.jpg) - -Alexey Milovidov from Yandex have demonstrated how exactly ClickHouse became even faster in recent releases. -![Alexey Milovidov from Yandex](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/2.jpg) - -Alexander Zaitsev and Robert Hodges from Altinity have given an entry level tutorial to ClickHouse, which included loading in demo dataset and going through realistic queries against it with some extra variation demonstrating possible query optimization techniques. [Slides](https://www.percona.com/live/19/sites/default/files/slides/Making%20HTAP%20Real%20with%20TiFlash%20--%20A%20TiDB%20Native%20Columnar%20Extension%20-%20FileId%20-%20174070.pdf). Also Altinity was sponsoring the ClickHouse booth in Expo Hall which became an easy spot for people interested in ClickHouse to chat outside of talks. -![Alexander Zaitsev and Robert Hodges from Altinity](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/3.jpg) - -Ruoxi Sun from PingCAP introduced TiFlash, column-oriented add-on to TiDB for analytics based on ClickHouse source code. Basically it provides [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree/)-like table engine that is hooked up to TiDB replication and has in-memory row-friendly cache for recent updates. Unfortunately, PingCAP has no plans to bring TiFlash to opensource at the moment. [Slides](https://www.percona.com/live/19/sites/default/files/slides/Making%20HTAP%20Real%20with%20TiFlash%20--%20A%20TiDB%20Native%20Columnar%20Extension%20-%20FileId%20-%20174070.pdf). -![Ruoxi Sun from PingCAP](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/4.jpg) - -ClickHouse has also been covered in talk by Jervin Real and Francisco Bordenave from Percona with overview of moving and replicating data around MySQL-compatible storage solutions. [Slides](https://www.percona.com/live/19/sites/default/files/slides/Replicating%20MySQL%20Data%20to%20TiDB%20For%20Real-Time%20Analytics%20-%20FileId%20-%20187672.pdf). -![Jervin Real](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/5.jpg) - -ClickHouse represented columnar storage systems in venture beyond relational by Marcos Albe from Percona. -![Marcos Albe from Percona](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/6.jpg) - -Jervin Real from Percona have demonstrated real case study of applying ClickHouse in practice. It heavily involved manual partitions manipulation, hopefully audience have understood that it is an option, but not exactly a best practice for most use cases. [Slides](https://www.percona.com/live/19/sites/default/files/slides/Low%20Cost%20Transactional%20and%20Analytics%20With%20MySQL%20and%20Clickhouse,%20Have%20Your%20Cake%20and%20Eat%20It%20Too!%20-%20FileId%20-%20187674.pdf). -![Jervin Real from Percona](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/7.jpg) - -Evgeny Potapov from ITSumma went through modern options for time-series storage and once more confirmed ClickHouse is leading the way in this field as well. -![Evgeny Potapov from ITSumma](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/8.jpg) - -Event location in the center of US provided equal opportunities for peoplefrom East and West Coast to show up, but presence of people from other countries was also quite noticeable. The content they all brought in was top notch as usual. -![The venue](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/9.jpg) - -Austin after the Event. -![Austin](https://blog-images.clickhouse.com/en/2019/clickhouse-at-percona-live-2019/10.jpg) diff --git a/website/blog/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md b/website/blog/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md deleted file mode 100644 index f9a156fa54b..00000000000 --- a/website/blog/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: 'ClickHouse Lecture at Institute of Computing Technology, Chinese Academy of Science on June 11, 2019' -image: 'https://blog-images.clickhouse.com/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019/main.jpg' -tags: ['lecture', 'events', 'China', 'Beijing', 'university', 'academy', 'institute'] -date: '2019-06-14' ---- - -Alexey Milovidov, head of ClickHouse development group at Yandex, have given an open two-part lecture at [Institute of Computing Technology, Chinese Academy of Science](http://english.ict.cas.cn/): - -- ClickHouse history and evolution of Yandex.Metrica storage system -- Internal implementation of ClickHouse and reasoning behind design decisions - -The event has been organised by [Amos Bird](https://github.com/amosbird), who is one of the most active ClickHouse community members and contributors, at the same time being a last year PhD student at this institution. - -Alexey with the event announcement: -![Alexey with the event announcement](https://blog-images.clickhouse.com/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019/1.jpg) - diff --git a/website/blog/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019.md b/website/blog/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019.md deleted file mode 100644 index 46685db0c37..00000000000 --- a/website/blog/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: 'ClickHouse Meetup in Beijing on June 8, 2019' -image: 'https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/main.jpg' -date: '2019-06-13' -tags: ['meetup','Beijing','China','events'] ---- - -24th ClickHouse Meetup globally and 3rd one in China took place in Beijing on Dragon Boat Festival weekend, which appeared to have a rich history and be a popular opportunity for Chinese people to travel around the country. Nevertheless the ClickHouse Meetup venue was more than full as usual, this time kindly provided by Gaea Mobile, with hundreds more people watching live broadcast online. ClickHouse team have extensively used this trip as an opportunity to strengthen the bond with ClickHouse Community in China by also giving an [open lecture in Institute of Computing Technology, Chinese Academy of Science](clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md) and by having a private conversations with the most active local corporate users including ByteDance and JD.com. - -Welcome word by William Kwok, CTO of Analysys, who played the key role in making this event in particular possible and also in establishment of ClickHouse Community in China: -![William Kwok, CTO of Analysys](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/1.jpg) - -He's also administering ClickHouse WeChat groups, feel free to ask him for invite (@guodaxia2999 at WeChat): -![@guodaxia2999 at WeChat](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/2.jpg) - -Alexey Milovidov from ClickHouse core developers team at Yandex got the content part of main event part started with overview of new features and roadmap overview: -![Alexey Milovidov](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/3.jpg) - -Amos Bird, one of the most active ClickHouse contributors either in China and worldwide, shares his experience of using ClickHouse for graph processing ([slides](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup24/2.%20SQLGraph%20--%20When%20ClickHouse%20marries%20graph%20processing%20Amoisbird.pdf)): -![Amos Bird](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/4.jpg) - -Yan Gangqiang from Golden Data shares details of their approach to data storage for surveys system based on ClickHouse ([slides](https://presentations.clickhouse.com/meetup24/3.%20金数据数据架构调整方案Public.pdf)): -![Yan Gangqiang](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/5.jpg) - -ClickHouse for beginners talk by Percent ([slides](https://presentations.clickhouse.com/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf)): -![Percent](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/6.jpg) - -ClickHouse core developer Nikolay Kochetov demonstrates upcoming query execution pipeline changes ([slides](https://presentations.clickhouse.com/meetup24/5.%20Clickhouse%20query%20execution%20pipeline%20changes/)): -![Nikolay Kochetov](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/7.jpg) - -Pre-meetup meeting with active ClickHouse community members in China: -![Pre-meetup meeting](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/8.jpg) - -ClickHouse branded Beijing duck :) -![ClickHouse branded Beijing duck](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/9.jpg) diff --git a/website/blog/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019.md b/website/blog/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019.md deleted file mode 100644 index 0ca02aaac66..00000000000 --- a/website/blog/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: 'ClickHouse Meetup in Limassol on May 7, 2019' -image: 'https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/main.jpg' -date: '2019-05-14' -tags: ['meetup', 'Cyprus', 'Limassol', 'events'] ---- - -The first open-air ClickHouse Meetup took place in the heart of Limassol, the second-largest city of Cyprus, on the roof kindly provided by Exness Group. The views were stunning, but speakers did a great job competing with them for audience attention. Over one hundred people have joined in, which once again confirms high interest in ClickHouse around the globe. Meetup content is also available as [video recording](https://www.youtube.com/watch?v=_rpU-TvSfZ8). - -![Intro](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/1.jpg) - -[Kirill Shvakov](https://github.com/kshvakov) has played the key role in making this event possible by reaching out to the ClickHouse Community at Cyprus, finding the great venue, and other speakers. Most of the worldwide ClickHouse Meetups happen thanks to active community members like Kirill, if you want to help us organize ClickHouse Meetup in your area, please reach the ClickHouse team via [this form](https://clickhouse.com/#meet) or any other convenient way. - -![Kirill Shvakov](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/2.jpg) - -Kirill is well known for his top-notch [ClickHouse Go Driver](https://github.com/clickhouse/clickhouse-go) running over native protocol, but his opening talk was about his experience optimizing ClickHouse queries and solving real-world tasks at Integros and Wisebits. [Slides](https://presentations.clickhouse.com/meetup22/strategies.pdf). [Full query listings](https://github.com/kshvakov/ClickHouse-Meetup-Exness). - -The event has begun in the early evening... -![Evening in Limassol](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/3.jpg) - -...but it took just around one hour for nature to turn the night mode on. It made the projected slides easier to read. -![Night in Limassol](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/4.jpg) - -Sergey Tomilov with his colleagues from the Exness Platform Team has shared details on the evolution of their systems for analyzing logs and metrics and how they ended up relying on ClickHouse for long-term storage ([slides](https://presentations.clickhouse.com/meetup22/exness.pdf)): -![Sergey Tomilov](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/5.jpg) - -Alexey Milovidov from the ClickHouse team has demonstrated features from recent ClickHouse releases and gave an update on what's coming soon ([slides](https://presentations.clickhouse.com/meetup22/new_features/)): -![Alexey Milovidov](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/6.jpg) - -Alexander Zaitsev, CTO of Altinity, have shown an overview of how to integrate ClickHouse into environments running on Kubernetes ([slides](https://presentations.clickhouse.com/meetup22/kubernetes.pdf)): -![Alexander Zaitsev, CTO of Altinity](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/7.jpg) - -Vladimir Goncharov, a backend engineer from Aloha Browser, has closed the ClickHouse Limassol Meetup by demonstrating few projects that allow integrating other opensource tools for logs processing with ClickHouse ([slides](https://presentations.clickhouse.com/meetup22/aloha.pdf)): -![Vladimir Goncharov](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/8.jpg) - -Unfortunately, midnight was closing in and only the most weather-proof ClickHouse fans have managed to stay the whole event as it started getting pretty chilly. - -![Final](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/9.jpg) - -More photos from the event are available at [short event afterword by Exness](https://www.facebook.com/events/386638262181785/permalink/402167077295570/). - diff --git a/website/blog/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019.md b/website/blog/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019.md deleted file mode 100644 index e166c2227ff..00000000000 --- a/website/blog/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: 'ClickHouse Meetup in Madrid on April 2, 2019' -image: 'https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/main.jpg' -date: '2019-04-11' -tags: ['meetup', 'Spain', 'Madrid', 'events'] ---- - -Madrid ClickHouse Meetup has probably been the largest one in the EU so far with well over one hundred attendees. As usual, we've seen not only people working and living in the same city, Madrid, but also many people who have traveled a long way to join the event and talk about ClickHouse use cases and learn about new and upcoming features. - -Opening word by [Javi Santana](https://twitter.com/javisantana), who personally made this event possible by gathering up all the people and setting up the venue provided by Google Campus for Startups: -![Javi Santana](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/1.jpg) - -Alexander Zaitsev, CTO of Altinity, has introduced ClickHouse to those who're just starting to use it or only considering for future ([slides](https://www.slideshare.net/Altinity/clickhouse-introduction-by-alexander-zaitsev-altinity-cto)): -![Alexander Zaitsev](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/2.jpg) - -Robert Hodges, CEO of Altinity, has probably traveled the longest distance to join the event since he's based in California and he has also [published his thoughts on this event in the Altinity blog](https://www.altinity.com/blog/2019/4/9/madrid-clickhouse-meetup-summary). - -Alexey Milovidov from Yandex has shown the recent advancements in ClickHouse features and briefly walked the audience through the current roadmap ([slides](https://presentations.clickhouse.com/meetup21/new_features/)): -![Alexey Milovidov from Yandex](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/3.jpg) - -Iago Enriquez from Idealista was talking about their migration from “legacy” commercial DBMS to ClickHouse. It was the first time we've heard that someone talking about using two flagship opensource products by Yandex together in production. They are using [CatBoost](https://catboost.ai/) model inference right from ClickHouse SQL queries to fill in the incompleteness of their source data. Unfortunately, slides of Iago's talk were not allowed to be published. -![Iago Enriquez from Idealista](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/4.jpg) - -David Pardo Villaverde from Corunet gave a talk about how they've used ClickHouse to prepare data for dense model generation for one of their clients. It took a pretty short time on a single server. Fun quote from conclusions: “If I wasn't already married, I'd marry it! \[ClickHouse\]” ([slides](https://presentations.clickhouse.com/meetup21/predictive_models.pdf)): -![David Pardo Villaverde from Corunet](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/5.jpg) - -Closing talk of the meetup was by Murat Kabilov fro Adjust Gmbh, he was demonstrating his opensource project [pg2ch](https://github.com/mkabilov/pg2ch) that allows to sync data from PostgreSQL to ClickHouse in real-time ([slides](https://presentations.clickhouse.com/meetup21/postgres_to_clickhouse.pdf)). -![Murat Kabilov fro Adjust Gmbh](https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/6.jpg) diff --git a/website/blog/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md b/website/blog/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md deleted file mode 100644 index aa468fb6aa9..00000000000 --- a/website/blog/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse Meetup in San Francisco on June 4, 2019' -image: 'https://blog-images.clickhouse.com/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019/main.jpg' -date: '2019-06-12' -tags: ['meetup','USA','San Francisco','events', 'California', 'Bay Area'] ---- - -23th ClickHouse Meetup in San Francisco was held in CloudFlare office and co-organized by Altinity. There were about 35 attendees, most of them are experienced ClickHouse users from SF and Bay Area. The meetup started with an introduction by Robert Hodges, Altinity CEO and continued with a lightning talk by Alan Braithwaite from Segment.com about their experience with ClickHouse. Next talk from Alexander Zaitsev about ClickHouse operator for Kubernetes gained much attention from the audience because Kubernetes is in fact very popular even for databases. At the end there was a presentation from the ClickHouse developer Alexey Milovidov about new and upcoming features with a roadmap. There was a discussion about the details of implementation and design of the most appreciated features. We were happy to meet with ClickHouse contributors at the meetup. Slides from the event are [available on GitHub](https://github.com/clickhouse/clickhouse-presentations/tree/master/meetup23). - -As we see increasing demand for ClickHouse events in SF and Bay Area, we have already started planning the next event. diff --git a/website/blog/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse.md b/website/blog/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse.md deleted file mode 100644 index 0d754a3158d..00000000000 --- a/website/blog/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: 'How to speed up LZ4 decompression in ClickHouse?' -image: 'https://blog-images.clickhouse.com/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse/main.jpg' -date: '2019-06-25' -tags: ['performance', 'lz4', 'article', 'decompression'] -author: 'Alexey Milovidov' ---- - -When you run queries in [ClickHouse](https://clickhouse.com/), you might notice that the profiler often shows the `LZ_decompress_fast` function near the top. What is going on? This question had us wondering how to choose the best compression algorithm. - -ClickHouse stores data in compressed form. When running queries, ClickHouse tries to do as little as possible, in order to conserve CPU resources. In many cases, all the potentially time-consuming computations are already well optimized, plus the user wrote a well thought-out query. Then all that's left to do is to perform decompression. - -[Read further](https://habr.com/en/company/yandex/blog/457612/) diff --git a/website/blog/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019.md b/website/blog/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019.md deleted file mode 100644 index c6bfadb8a4b..00000000000 --- a/website/blog/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: 'Schedule of ClickHouse Meetups in China for 2019' -image: 'https://blog-images.clickhouse.com/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019/main.jpg' -date: '2019-04-18' -tags: ['China', 'Beijing', 'Shanghai', 'Shenzhen', 'announcement', 'meetup'] ---- - -Last year there were two ClickHouse Meetups in Beijing, in [January](../2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md) and [October](../2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md), and they appeared to be in extremely high demand, with fully packed venue and thousands of people watching online. So this year we decided to try to expand meetups to other large cities in China where we see the most interest in ClickHouse based on website visits. Here's the current schedule and sign up pages: - -- [ClickHouse Community Meetup in Beijing](https://www.huodongxing.com/event/2483759276200) on June 8. -- [ClickHouse Community Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20. -- [ClickHouse Community Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27. - -到时候那里见! diff --git a/website/blog/en/2020/five-methods-for-database-obfuscation.md b/website/blog/en/2020/five-methods-for-database-obfuscation.md deleted file mode 100644 index 02be447ba0c..00000000000 --- a/website/blog/en/2020/five-methods-for-database-obfuscation.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: 'Five Methods For Database Obfuscation' -image: 'https://blog-images.clickhouse.com/en/2020/five-methods-for-database-obfuscation/main.jpg' -date: '2020-01-27' -tags: ['article', 'obfuscation'] -author: 'Alexey Milovidov' ---- - -ClickHouse users already know that its biggest advantage is its high-speed processing of analytical queries. But claims like this need to be confirmed with reliable performance testing. - -[Read further](https://habr.com/en/company/yandex/blog/485096/) diff --git a/website/blog/en/2020/package-repository-behind-cdn.md b/website/blog/en/2020/package-repository-behind-cdn.md deleted file mode 100644 index 77919f12266..00000000000 --- a/website/blog/en/2020/package-repository-behind-cdn.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: 'Package Repository Behind CDN' -image: 'https://blog-images.clickhouse.com/en/2020/package-repository-behind-cdn/main.jpg' -date: '2020-07-02' -tags: ['article', 'CDN', 'Cloudflare', 'repository', 'deb', 'rpm', 'tgz'] -author: 'Ivan Blinkov' ---- - -On initial open-source launch, ClickHouse packages were published at an independent repository implemented on Yandex infrastructure. We'd love to use the default repositories of Linux distributions, but, unfortunately, they have their own strict rules on third-party library usage and software compilation options. These rules happen to contradict with how ClickHouse is produced. In 2018 ClickHouse was added to [official Debian repository](https://packages.debian.org/sid/clickhouse-server) as an experiment, but it didn't get much traction. Adaptation to those rules ended up producing more like a demo version of ClickHouse with crippled performance and limited features. - -!!! info "TL;DR" - If you have configured your system to use for fetching ClickHouse packages, replace it with . - -Distributing packages via our own repository was working totally fine until ClickHouse has started getting traction in countries far from Moscow, most notably the USA and China. Downloading large files of packages from remote location was especially painful for Chinese ClickHouse users, likely due to how China is connected to the rest of the world via its famous firewall. But at least it worked (with high latencies and low throughput), while in some smaller countries there was completely no access to this repository and people living there had to host their own mirrors on neutral ground as a workaround. - -Earlier this year we made the ClickHouse official website to be served via global CDN by [Cloudflare](https://www.cloudflare.com) on a `clickhouse.com` domain. To solve the download issues discussed above, we have also configured a new location for ClickHouse packages that are also served by Cloudflare at [repo.clickhouse.com](https://repo.clickhouse.com). It used to have some quirks, but now it seems to be working fine while improving throughput and latencies in remote geographical locations by over an order of magnitude. - -## Switching To Repository Behind CDN - -This transition has some more benefits besides improving the package fetching, but let's get back to them in a minute. One of the key reasons for this post is that we can't actually influence the repository configuration of ClickHouse users. We have updated all instructions, but for people who have followed these instructions earlier, **action is required** to use the new location behind CDN. Basically, you need to replace `http://repo.yandex.ru/clickhouse/` with `https://repo.clickhouse.com/` in your package manager configuration. - -One-liner for Ubuntu or Debian: -```bash -sudo apt-get install apt-transport-https ca-certificates && sudo perl -pi -e 's|http://repo.yandex.ru/clickhouse/|https://repo.clickhouse.com/|g' /etc/apt/sources.list.d/clickhouse.list && sudo apt-get update -``` - -One-liner for RedHat or CentOS: -```bash -sudo perl -pi -e 's|http://repo.yandex.ru/clickhouse/|https://repo.clickhouse.com/|g' /etc/yum.repos.d/clickhouse* -``` - -As you might have noticed, the domain name is not the only thing that has changed: the new URL uses `https://` protocol. Usually, it's considered less important for package repositories compared to normal websites because most package managers check [GPG signatures](https://en.wikipedia.org/wiki/GNU_Privacy_Guard) for what they download anyway. However it still has some benefits: for example, it's not so uncommon for people to download packages via browser, `curl` or `wget`, and install them manually (while for [tgz](https://repo.clickhouse.com/tgz/) builds it's the only option). Fewer opportunities for sniffing traffic can't hurt either. The downside is that `apt` in some Debian flavors has no HTTPS support by default and needs a couple more packages to be installed (`apt-transport-https` and `ca-certificates`). - -## Investigating Repository Usage - -The next important thing we obtained by using Cloudflare for our package repository is observability. Of course the same could have been implemented from scratch, but it'd require extra resources to develop and maintain, while Cloudflare provides quite rich tools for analyzing what's going on in your domains. - -!!! info "Did you know?" - It's kind of off-topic, but those Cloudflare features are internally based on ClickHouse, see their [HTTP analytics](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) and [DNS analytics](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/) blog posts. - -Just a few weeks ago they have also added [cache analytics](https://blog.cloudflare.com/introducing-cache-analytics/) feature, which allowed to drill into how effectively the content is cached on CDN edges and improve the CDN configuration accordingly. For example, it allowed debugging some inconsistencies in cached repository metadata. - -## Digging Deeper - -All those built-in observability tools provided by Cloudflare share one weak point: they are purely technical and generic, without any domain-specific awareness. They excel at debugging low-level issues, but it's hard to get a higher-level picture based on them. With our package repository scenario, we're not so interested in frequent metadata update requests, but we'd like to see reports on package downloads by version, kind, and so on. We definitely didn't want to operate a separate infrastructure to get those reports, but given there was no out-of-the-box solution, we had to be creative and managed to find a cool middle ground. - -Ever heard the [“serverless computing”](https://en.wikipedia.org/wiki/Serverless_computing) hype recently? That was the basic idea: let's assemble a bunch of serverless or managed services to get what we want, without any dedicated servers. The plan was pretty straightforward: - -1. Dump details about package downloads to a ClickHouse database. -2. Connect some [BI](https://en.wikipedia.org/wiki/Business_intelligence) tool to that ClickHouse database and configure required charts/dashboards. - -Implementing it required a little bit of research, but the overall solution appeared to be quite elegant: - -1. For a ClickHouse database, it was a no-brainer to use [Yandex Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse). With a few clicks in the admin interface, we got a running ClickHouse cluster with properly configured high-availability and automated backups. Ad-hoc SQL queries could be run from that same admin interface. -2. Cloudflare allows customers to run custom code on CDN edge servers in a serverless fashion (so-called [workers](https://workers.cloudflare.com)). Those workers are executed in a tight sandbox which doesn't allow for anything complicated, but this feature fits perfectly to gather some data about download events and send it somewhere else. This is normally a paid feature, but special thanks to Connor Peshek from Cloudflare who arranged a lot of extra features for free on `clickhouse.com` when we have applied to their [open-source support program](https://developers.cloudflare.com/sponsorships/). -3. To avoid publicly exposing yet another ClickHouse instance (like we did with **[playground](/docs/en/getting-started/playground/)** regardless of being a 100% anti-pattern), the download event data is sent to [Yandex Cloud Functions](https://cloud.yandex.com/services/functions). It's a generic serverless computing framework at Yandex Cloud, which also allows running custom code without maintaining any servers, but with less strict sandbox limitations and direct access to other cloud services like Managed ClickHouse that was needed for this task. -4. It didn't require much effort to choose a visualization tool either, as [DataLens BI](https://cloud.yandex.com/docs/datalens/) is tightly integrated with ClickHouse, capable to build what's required right from the UI, and satisfies the “no servers” requirement because it's a SaaS solution. Public access option for charts and dashboards have also appeared to be handy. - -There's not so much data collected yet, but here's a live example of how the resulting data visualization looks like. For example, here we can see that LTS releases of ClickHouse are not so popular yet *(yes, we have [LTS releases](https://clickhouse.com/docs/en/faq/operations/production/)!)*: -![iframe](https://datalens.yandex/qk01mwxkgiysm?_embedded=1) - -While here we confirmed that `rpm` is at least as popular as `deb`: -![iframe](https://datalens.yandex/lfvldsf92i2uh?_embedded=1) - -Or you can take a look at all key charts for `repo.clickhouse.com` together on a handy **[dashboard](https://datalens.yandex/pjzq4rot3t2ql)** with a filtering possibility. - -## Lessons Learned - -* CDN is a must-have if you want people from all over the world to download some artifacts that you produce. Beware the huge pay-for-traffic bills from most CDN providers though. -* Generic technical system metrics and drill-downs are a good starting point, but not always enough. -* Serverless is not a myth. Nowadays it is indeed possible to build useful products by just integrating various infrastructure services together, without any dedicated servers to take care of. diff --git a/website/blog/en/2020/pixel-benchmark.md b/website/blog/en/2020/pixel-benchmark.md deleted file mode 100644 index 632a56d5bc6..00000000000 --- a/website/blog/en/2020/pixel-benchmark.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: 'Running ClickHouse on an Android phone' -image: 'https://blog-images.clickhouse.com/en/2020/pixel-benchmark/main.jpg' -date: '2020-07-16' -author: 'Alexander Kuzmenkov' -tags: ['Android', 'benchmark', 'experiment'] ---- - - -This is a brief description of my experiments with building ClickHouse on Android. If this is your first time hearing about ClickHouse, it is a suriprisingly fast columnar SQL DBMS for real-time reporting. It's normally used in AdTech and the like, deployed on clusters of hundreds of machines, holding up to petabytes of data. But ClickHouse is straightforward to use on a smaller scale as well — you laptop will do, and don't be surprised if you are able to process several gigabytes of data per second on this hardware. There is another kind of small-scale, though pretty powerful, platforms, that is ubiquitous now — smartphones. The conclusion inevitably follows: you must be able to run ClickHouse on your smartphone as well. It's also that I can't help but chuckle at the idea of setting up a high performance mobile OLAP cluster using a dozen of phones. Or also at the idea of seeing the nostalgic `Segmentation fault (core dumped)` on the lovely OLED screen, but I digress. Let's get it going. - -## First cheap attempt - -I heard somewhere that Android uses the Linux kernel, and I can already run familiar UNIX-like shell and tools using [Termux](https://termux.com/). And ClickHouse already supports ARM platform and even publishes a binary built for 64-bit ARM. This binary also doesn't have a lot of dependencies — only a pretty old version of `glibc`. Maybe I can just download a ClickHouse binary from CI to the phone and run it? - -Turns out it's not that simple. - -* The first thing we'll see after trying to run is an absurd error message: `./clickhouse: file is not found`. But it's right there! `strace` helps: what cannot be found is `/lib64/ld-linux-x86-64.so.2`, a linker specified in the ClickHouse binary. The linker, in this case, is a system program that initially loads the application binary and its dependencies before passing control to the application. Android uses a different linker located by another path, this is why we get the error. This problem can be overcome if we call the linker explicitly, e.g. `/system/bin/linker64 $(readlink -f ./clickhouse)`. - -* Immediately we encounter another problem: the linker complains that the binary has a wrong type `ET_EXEC`. What does this mean? Android binaries must support dynamic relocation, so that they can be loaded at any address, probably for ASLR purposes. ClickHouse binaries do not normally use position-independent code, because we have measured that it gives a small performance penalty of about 1%. After tweaking compilation and linking flags to include `-fPIC` as much as possible, and battling some really weird linker errors, we finally arrive at a relocatable binary that has a correct type `ET_DYN`. - -* But it only gets worse. Now it complains about TLS section offset being wrong. After reading some mail archives where I could barely understand a word, I concluded that Android uses some different layout of memory for the section of the executable that holds thread-local variables, and `clang` from Android toolchain is patched to account for this. After that, I had to accept I won't be able to use familiar tools, and reluctantly turned to the Android toolchain. - -## Using the Android toolchain - -Surprisingly, it's rather simple to set up. Our build system uses CMake and already supports cross-compilation — we have CI configurations that cross-compile for Mac, AArch64 Linux and FreeBSD. Android NDK also has integration with CMake and a [manual](https://developer.android.com/ndk/guides/cmake) on how to set it up. Download the Android NDK, add some flags to your `cmake` invocation: `DCMAKE_TOOLCHAIN_FILE=~/android-ndk-r21d/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=28`, and you're done. It (almost) builds. What obstacles do we have this time? - -* Our `glibc` compatibility layer has a lot of compilation errors. It borrows `musl` code to provide functions that are absent from older versions of `glibc`, so that we can run the same binary on a wide range of distros. Being heavily dependent on system headers, it runs into all kinds of differences between Linux and Android, such as the limited scope of `pthread` support or just subtly different API variants. Thankfully we're building for a particular version of Android, so we can just disable this and use all needed functions straight from the system `libc`. -* Some third-party libraries and our CMake files are broken in various unimaginative ways. Just disable everything we can and fix everything we can't. -* Some of our code uses `#if defined(__linux__)` to check for Linux platform. This doesn't always work, because Android also exports `__linux__` but there are some API differences. -* `std::filesystem` is still not fully supported in NDK r21. The support went into r22 that is scheduled for Q3 2020, but I want it right now... Good that we bundle our own forks of `libcxx` and `libcxxabi` to reduce dependencies, and they are fresh enough to fully support C++20. After enabling them, everything works. -* Weird twenty-screens errors in `std::map` or something like that, that are also resolved by using our `libcxx`. - -## On the device - -At last, we have a binary we can actually run. Copy it to the phone, `chmod +x`, `./clickhouse server --config-path db/config.xml`, run some queries, it works! - - - -Feels so good to see my favorite message. - -It's a full-fledged development environment here in Termux, let's install `gdb` and attach it to see where the segfault happens. Run `gdb clickhouse --ex run '--config-path ....'`, wait for it to lauch for a minute, only to see how Android kills Termux becase it is out of memory. Are 4 GB of RAM not enough, after all? Looking at the `clickhouse` binary, its size is a whoppping 1.1 GB. The major part of the bloat is due to the fact that some of our computational code is heavily specialized for particular data types (mostly via C++ templates), and also the fact that we build and link a lot of third-party libraries statically. A non-essential part of the binary is debug symbols, which help to produce good stack traces in error messages. We can remove them with `strip -s ./clickhouse` right here on the phone, and after that, the size becomes more manageable, about 400 MB. Finally we can run `gdb` and see that the segfault is somewhere in `unw_backtrace`: - -``` -Thread 60 "ConfigReloader" received signal SIGSEGV, Segmentation fault. -[Switching to LWP 21873] -0x000000556a73f740 in ?? () - -(gdb) whe 20 -#0 0x000000556a73f740 in ?? () -#1 0x000000556a744028 in ?? () -#2 0x000000556a73e5a0 in ?? () -#3 0x000000556a73d250 in unw_init_local () -#4 0x000000556a73deb8 in unw_backtrace () -#5 0x0000005562aabb54 in StackTrace::tryCapture() () -#6 0x0000005562aabb10 in StackTrace::StackTrace() () -#7 0x0000005562a8d73c in MemoryTracker::alloc(long) () -#8 0x0000005562a8db38 in MemoryTracker::alloc(long) () -#9 0x0000005562a8e8bc in CurrentMemoryTracker::alloc(long) () -#10 0x0000005562a8b88c in operator new[](unsigned long) () -#11 0x0000005569c35f08 in Poco::XML::NamePool::NamePool(unsigned long) () -... -``` - -What is this function, and why do we need it? In this particular stack trace, we're out of memory, and about to throw an exception for that. `unw_backtrace` is called to produce a backtrace for the exception message. But there is another interesting context where we call it. Believe it or not, ClickHouse has a built-in `perf`-like sampling profiler that can save stack traces for CPU time and real time, and also memory allocations. The data is saved into a `system.trace_log` table, so you can build flame graphs for what your query was doing as simple as piping output of an SQL query into `flamegraph.pl`. This is an interesting feature, but what is relevant now is that it sends signals to all threads of the server to interrupt them at some random time and save their current backtraces, using the same `unw_backtrace` function that we know to segfault. We expect query profiler to be used in production environment, so it is enabled by default. After disabling it, we have a functioning ClickHouse server running on Android. - -## Is your phone good enough? - -There is a beaten genre of using data sets and queries of a varying degree of syntheticity to prove that a particular DBMS you work on has performance superior to other, less advanced, DBMSes. We've moved past that, and instead use the DBMS we love as a benchmark of hardware. For this benchmark we use a small 100M rows obfuscated data set from Yandex.Metrica, about 12 GB compressed, and some queries representative of Metrica dashboards. There is [this page](/benchmark/hardware/) with crowdsourced results for various cloud and traditional servers and even some laptops, but how do the phones compare? Let's find out. Following [the manual](/docs/en/operations/performance-test/) to download the necessary data to the phone and run the benchmark was pretty straightforward. One problem was that some queries can't run because they use too much memory and the server gets killed by Android, so I had to script around that. Also, I'm not sure how to reset a file system cache on Android, so the 'cold run' data is not correct. The results look pretty good: - - - -My phone is Google Pixel 3a, and it is only 5 times slower on average than my Dell XPS 15 work laptop. The queries where the data doesn't fit into memory and has to go to disk (the flash, I mean) are noticeably slower, up to 20 times, but mostly they don't complete because the server gets killed — it only has about 3 GB of memory available. Overall I think the results look pretty good for the phone. High-end models should be even more performant, reaching performance comparable to some smaller laptops. - -## Conclusion - -This was a rather enjoyable exercise. Running a server on your phone is a nice way to give a demo, so we should probably publish a Termux package for ClickHouse. For this, we have to debug and fix the `unw_backtrace` segfault (I have my fingers crossed that it will be gone after adding `-fno-omit-frame-pointer`), and also fix some quirks that are just commented out for now. Most of the changes required for the Android build are already merged into our master branch. - -Building for Android turned out to be relatively simple — all these experiments and writing took me about four days, and it was the first time I ever did any Android-related programming. The NDK was simple to use, and our code was cross-platform enough so I only had to make minor modifications. If we didn't routinely build for AArch64 and had a hard dependency on SSE 4.2 or something, it would have been a different story. - -But the most important takeout is that now you don't have to obsess over choosing a new phone — just benchmark it with ClickHouse. - - -_2020-07-16 [Alexander Kuzmenkov](https://github.com/akuzm)_ diff --git a/website/blog/en/2020/the-clickhouse-community.md b/website/blog/en/2020/the-clickhouse-community.md deleted file mode 100644 index 3e5c614430f..00000000000 --- a/website/blog/en/2020/the-clickhouse-community.md +++ /dev/null @@ -1,138 +0,0 @@ ---- -title: 'The ClickHouse Community' -image: 'https://blog-images.clickhouse.com/en/2020/the-clickhouse-community/clickhouse-community-history.png' -date: '2020-12-10' -author: 'Robert Hodges' -tags: ['community', 'open source', 'telegram', 'meetup'] ---- - -One of the great “features” of ClickHouse is a friendly and welcoming community. In this article we would like to outline how the ClickHouse community arose, what it is today, and how you can get involved. There is a role for everyone, from end users to contributors to corporate friends. Our goal is to make the community welcoming to every person who wants to join. - -But first, let’s review a bit of history, starting with how ClickHouse first developed at [Yandex](https://yandex.com/company/). - -## Origins at Yandex - -ClickHouse began as a solution for web analytics in [Yandex Metrica](https://metrica.yandex.com/about?). Metrica is a popular service for analyzing website traffic that is now #2 in the market behind Google Analytics. In 2008 [Alexey Milovidov](https://github.com/alexey-milovidov), an engineer on the Metrica team, was looking for a database that could create reports on metrics like number of page views per day, unique visitors, and bounce rate, without aggregating the data in advance. The idea was to provide a wide range of metric data and let users ask any question about them. - -This is a classic problem for data warehouses. However, Alexey could not find one that met Yandex requirements, specifically large datasets, linear scaling, high efficiency, and compatibility with SQL tools. In a nutshell: like MySQL but for analytic applications. So Alexey wrote one. It started as a prototype to do GROUP BY operations. - -The prototype evolved into a full solution with a name, ClickHouse, short for “Clickstream Data Warehouse”. Alexey added additional features including SQL support and the MergeTree engine. The SQL dialect was superficially similar to MySQL, [which was also used in Metrica](/blog/en/2016/evolution-of-data-structures-in-yandex-metrica/) but could not handle query workloads without complex pre-aggregation. By 2011 ClickHouse was in production for Metrica. - -Over the next 5 years Alexey and a growing team of developers extended ClickHouse to cover new use cases. By 2016 ClickHouse was a core Metrica backend service. It was also becoming entrenched as a data warehouse within Yandex, extending to use cases like service monitoring, network flow logs, and event management. ClickHouse had evolved from the original one-person project to business critical software with a full team of a dozen engineers led by Alexey. - -By 2016, ClickHouse had an 8 year history and was ready to become a major open source project. Here’s a timeline that tracks major developments as a time series. - - - -## ClickHouse goes open source - -Yandex open sourced ClickHouse under an Apache 2.0 license in 2016. There were numerous reasons for this step. - -* Promote adoption within Yandex by making it easier for internal departments to get builds. -* Ensure that ClickHouse would continue to evolve by creating a community to nurture it. -* Motivate developers to contribute to and use ClickHouse due to the open source “cool” factor. -* Improve ClickHouse quality by making the code public. Nobody wants their name visible on bad code. ;-) -* Showcase Yandex innovation to a worldwide audience. - -Alexey and the development team moved ClickHouse code to a Github repo under the Yandex organization and began issuing community builds as well as accepting external contributions. They simultaneously began regular meetups to popularize ClickHouse and build a community around it. The result was a burst of adoption across multiple regions of the globe. - -ClickHouse quickly picked up steam in Eastern Europe. The first ClickHouse meetups started in 2016 and have grown to include 200 participants for in-person meetings and up to 400 for online meetings. ClickHouse is now widely used in start-ups in Russia as well as other Eastern European countries. Developers located in Eastern Europe continue to supply more contributions to ClickHouse than any other region. - -ClickHouse also started to gain recognition in the US and Western Europe. [CloudFlare](https://www.cloudflare.com/) published a widely read blog article about [their success using ClickHouse for DNS analytics](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/). - -ClickHouse also took off in China. The first meetup in China took place in 2018 and attracted enormous interest. In-person meetups included over 400 participants. Online meetings have reached up to 1000 online viewers. - -In 2019 a further step occurred as ClickHouse moved out from under the Yandex Github organization into a separate [ClickHouse organization](https://github.com/ClickHouse). The new organization includes ClickHouse server code plus core ecosystem projects like the cpp and ODBC drivers. - -ClickHouse community events shifted online following world-wide disruptions due to COVID-19, but growth in usage continued. One interesting development has been the increasing number of startups using ClickHouse as a backend. Many of these are listed on the [ClickHouse Adopters](/docs/en/introduction/adopters/) page. Also, additional prominent companies like eBay, Uber, and Flipcart went public in 2020 with stories of successful ClickHouse usage. - -## The ClickHouse community today - -As of 2020 the ClickHouse community includes developers and users from virtually every region of the globe. Yandex engineers continue to supply a majority of pull requests to ClickHouse itself. Altinity follows in second place with contributions to ClickHouse core and ecosystem projects. There is also substantial in-house development on ClickHouse (e.g. on private forks) within Chinese internet providers. - -The real success, however, has been the huge number of commits to ClickHouse core from people in outside organizations. The following list shows the main outside contributors: - -* Azat Khuzhin -* Amos Bird -* Winter Zhang -* Denny Crane -* Danila Kutenin -* Hczhcz -* Marek Vavruša -* Guillaume Tassery -* Sundy Li -* Mikhail Shiryaev -* Nicolae Vartolomei -* Igor Hatarist -* Andrew Onyshchuk -* BohuTANG -* Yu Zhi Chang -* Kirill Shvakov -* Alexander Krasheninnikov -* Simon Podlipsky -* Silviu Caragea -* Flynn ucasFL -* [And over 550 more...](https://github.com/ClickHouse/ClickHouse/graphs/contributors) - -ClickHouse ecosystem projects are also growing rapidly. Here is a selected list of active Github projects that help enable ClickHouse applications, sorted by number of stars. - -* [sqlpad/sqlpad](https://github.com/sqlpad/sqlpad) — Web-based SQL editor that supports ClickHouse -* [mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) — Predictive AI layer for databases with ClickHouse support -* [x-ream/sqli](https://github.com/x-ream/sqli) — ORM SQL interface -* [tricksterproxy/trickster](https://github.com/tricksterproxy/trickster) — HTTP reverse proxy cache and time series dashboard accelerator -* [ClickHouse/clickhouse-go](https://github.com/ClickHouse/clickhouse-go) — Golang driver for ClickHouse -* [gohouse/gorose](https://github.com/gohouse/gorose) — A mini database ORM for Golang -* [ClickHouse/clickhouse-jdbc](https://github.com/ClickHouse/clickhouse-jdbc) — JDBC driver for ClickHouse -* [brockercap/Bifrost](https://github.com/brokercap/Bifrost) — Middleware to sync MySQL binlog to ClickHouse -* [mymarilyn/clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) — ClickHouse Python driver with native interface support -* [Vertamedia/clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana) — Grafana datasource for ClickHouse -* [smi2/phpClickHouse](https://github.com/smi2/phpClickHouse) — PHP ClickHouse client -* [AlexAkulov/clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) — ClickHouse backup and restore using cloud storage -* [And almost 1200 more...](https://github.com/search?o=desc&p=1&q=clickhouse&s=stars&type=Repositories) - -## Resources - -With the community growth numerous resources are available to users. At the center is the [ClickHouse org on Github](https://github.com/ClickHouse), which hosts [ClickHouse server code](https://github.com/ClickHouse/ClickHouse). ClickHouse server documentation is available at the [clickhouse.com](/) website. It has [installation instructions](/docs/en/getting-started/install/) and links to ClickHouse community builds for major Linux distributions as well as Mac, FreeBSD, and Docker. - -In addition, ClickHouse users have a wide range of ways to engage with the community and get help on applications. These include both chat applications as well as meetups. Here are some links to get started. - -* Yandex Meetups — Yandex has regular in-person and online international and Russian-language meetups. Video recordings and online translations are available at the official [YouTube channel](https://www.youtube.com/c/ClickHouseDB/videos). Watch for announcements on the [clickhouse.com](/) site and [Telegram](https://t.me/clickhouse_ru). -* [SF Bay Area ClickHouse Meetup](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/) — The largest US ClickHouse meetup, with meetings approximately every 2 months. -* Chinese meetups occur at regular intervals with different sponsors. Watch for announcements on clickhouse.com. -* Telegram - By far the largest forum for ClickHouse. It is the best place to talk to ClickHouse devs. There are two groups. -* [ClickHouse не тормозит](https://t.me/clickhouse_ru) (“ClickHouse does not slow down”) - Russian language Telegram group with 4,629 members currently. -* [ClickHouse](https://t.me/clickhouse_en) — English language group with 1,286 members. -* [ClickHouse Community Slack Channel](http://clickhousedb.slack.com) — Public channel for Slack users. It currently has 551 members. -* [ClickHouse.com.cn](http://clickhouse.com.cn/) — Chinese language site for ClickHouse-related announcements and questions. -* [Conference Presentations](https://github.com/ClickHouse/clickhouse-presentations) — ClickHouse developers like to talk and do so whenever they can. Many recent presentations are stored in Github. Also, look for ClickHouse presentations at Linux Foundation conferences, Data Con LA, Percona Live, and many other venues where there are presentations about data. -* Technical webinars — Altinity has a large library of technical presentations on ClickHouse and related applications on the [Altinity Youtube channel](https://www.youtube.com/channel/UCE3Y2lDKl_ZfjaCrh62onYA/featured). - -If you know of additional resources please bring them to our attention. - -## How you can contribute to ClickHouse - -We welcome users to join the ClickHouse community in every capacity. There are four main ways to participate. - -### Use ClickHouse and share your experiences - -Start with the documentation. Download ClickHouse and try it out. Join the chat channels. If you encounter bugs, [log issues](https://github.com/ClickHouse/ClickHouse/issues) so we can get them fixed. Also, it’s easy to make contributions to the documentation if you have basic Github and markdown skills. Press the pencil icon on any page of the clickhouse.com website to edit pages and automatically generate pull requests to merge your changes. - -If your company has deployed ClickHouse and is comfortable talking about it, please don't be shy. Add them to the [ClickHouse Adopters](/docs/en/introduction/adopters/) page so that others can learn from your experience. - -### Become a ClickHouse developer - -Write code to make ClickHouse better. Here are your choices. - -* ClickHouse server — Start with the [“For Beginners” documentation](/docs/en/development/developer-instruction/) to learn how to build ClickHouse and submit PRs. Check out the current ClickHouse issues if you are looking for work. PRs that follow the development standards will be merged faster. - -* Ecosystem projects — Most projects in the ClickHouse ecosystem accept PRs. Check with each project for specific practices. - -ClickHouse is also a great target for research problems. Overall the years many dozens of university CS students have worked on ClickHouse features. Alexey Milovidov maintains an especially rich set of [project suggestions for students](https://github.com/ClickHouse/ClickHouse/issues/15065). Join Telegram and ask for help if you are interested. Both Yandex and Altinity also offer internships. - -## Where we go from here - -ClickHouse has grown enormously from its origins as a basic prototype in 2008 to the popular SQL data warehouse users see today. Our community is the rock that will enable ClickHouse to become the default data warehouse worldwide. We are working together to create an inclusive environment where everyone feels welcome and has an opportunity to contribute. We welcome you to join! - -This article was written with kind assistance from Alexey Milovidov, Ivan Blinkov, and Alexander Zaitsev. - -_2020-12-11 [Robert Hodges](https://github.com/hodgesrm)_ diff --git a/website/blog/en/2021/clickhouse-inc.md b/website/blog/en/2021/clickhouse-inc.md deleted file mode 100644 index 1f420ef1dba..00000000000 --- a/website/blog/en/2021/clickhouse-inc.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: 'Introducing ClickHouse, Inc.' -image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-inc/home.png' -date: '2021-09-20' -author: 'Alexey Milovidov' -tags: ['company', 'incorporation', 'yandex', 'community'] ---- - -Today I’m happy to announce **ClickHouse Inc.**, the new home of ClickHouse. The development team has moved from Yandex and joined ClickHouse Inc. to continue building the fastest (and the greatest) analytical database management system. The company has received nearly $50M in Series A funding led by Index Ventures and Benchmark with participation by Yandex N.V. and others. I created ClickHouse, Inc. with two co-founders, [Yury Izrailevsky](https://www.linkedin.com/in/yuryizrailevsky/) and [Aaron Katz](https://www.linkedin.com/in/aaron-katz-5762094/). I will continue to lead the development of ClickHouse as Chief Technology Officer (CTO), Yury will run product and engineering, and Aaron will be CEO. - -## History of ClickHouse - -I started developing ClickHouse more than ten years ago, and it has never been an easy ride. The idea of ClickHouse came up while I was working in Yandex as a developer of a real-time web analytics system. My team and I faced multiple data processing challenges that often required custom data structures and sophisticated algorithms, creative solutions and tradeoffs, deep understanding of domain area, hardware, and math. All these years, I often went to bed with endless thoughts about how we could solve yet another data processing challenge. I love data and processing in extreme constraints, where you have to think about bytes and nanoseconds to save petabytes and seconds. The ClickHouse team shares this passion: in my opinion, this is the main reason for ClickHouse’s success. - -In 2009 we started ClickHouse as an experimental project to check the hypothesis if it's viable to generate analytical reports in real-time from non-aggregated data that is also constantly added in real-time. It took three years to prove this hypothesis, and in 2012 ClickHouse launched in production for the first time. Unlike custom data structures used before, ClickHouse was applicable more generally to work as a database management system. After several years I found that most departments in my company were using ClickHouse, and it made me wonder: maybe ClickHouse is too good to run only inside Yandex? Then we released it in [open source](https://github.com/ClickHouse/ClickHouse) in 2016. - -## ClickHouse in Open Source - -Making ClickHouse open source was also not an easy decision, but now I see: doing open source is hard, but it is a big win. While it takes a tremendous effort and responsibility to maintain a popular open-source product, for us, the benefits outweigh all the costs. Since we published ClickHouse, it has been deployed in production in thousands of companies across the globe for a wide range of use cases, from agriculture to self-driving cars. In 2019 we spent over a third of our time abroad organizing various ClickHouse events and speaking at external conferences, and we’re thrilled to see you all again in person once travel restrictions become less severe. The feedback and contributions from our community are priceless, and we improve the quality of implementation, the feature completeness, and making product decisions with the help of our community. One of our main focuses is to make ClickHouse welcoming for contributors by making the source code easy to read and understand, with the processes easy to follow. For me, ClickHouse is a showcase so everyone can learn the ideas in data processing. - -I like to present ClickHouse as the answer to many questions in software engineering. What is better: vectorization or JIT compilation? Look at ClickHouse; it is using both. How to write the code in modern C++ in a safe way? Ok, look at the testing infrastructure in ClickHouse. How to optimize the memcpy function? What is the fastest way to transform a Unix timestamp to date in a custom timezone? I can do multiple-hour talks about these topics, and thanks to the open-source, everyone can read the code, run ClickHouse and validate our claims. - -## Technical Advantage - -The most notable advantage of ClickHouse is its extremely high query processing speed and data storage efficiency. What is unique about ClickHouse performance? It is difficult to answer because there is no single "[silver bullet](https://www.youtube.com/watch?v=ZOZQCQEtrz8)". The main advantage is attention to details of the most extreme production workloads. We develop ClickHouse from practical needs. It has been created to solve the needs of Metrica, one of the [most widespread](https://w3techs.com/technologies/overview/traffic_analysis) web analytics services in the world. So ClickHouse is capable of processing 100+ PBs of data with more than 100 billion records inserted every day. One of the early adopters, Cloudflare, uses ClickHouse to process a large portion of all HTTP traffic on the internet with 10+ million records per second. As ClickHouse developers, we don’t consider the task solved if there is room for performance improvement. - -Query processing performance is not only about speed. It opens new possibilities. In previous generation data warehouses, you cannot run interactive queries without pre-aggregation; or you cannot insert new data in real time while serving interactive queries; or you cannot just store all your data. With ClickHouse, you can keep all records as long as you need and make interactive real-time reporting across the data. Before using ClickHouse, it was difficult to imagine that analytical data processing could be so easy and efficient: there is no need for a dozen pre-aggregating and tiering services (e.g. Druid), no need to place huge data volumes in RAM (e.g. Elastic), and no need to maintain daily/hourly/minutely tables (e.g. Hadoop, Spark). - -Most other database management systems don’t even permit benchmarks (through the infamous "DeWitt clause"). But we don’t fear benchmarks; we [collect them](https://github.com/ClickHouse/ClickHouse/issues/22398). ClickHouse documentation has [links](/docs/en/getting-started/example-datasets/) to publicly available datasets up to multiple terabytes in size from various domain areas. We encourage you to try ClickHouse, do some experiments on your workload, and find ClickHouse faster than others. And if not, we encourage you to publish the benchmark, and we will make ClickHouse better! - -Lastly, ClickHouse was purpose-built from the beginning to: - -— Be easy to install and use. It runs everywhere, from your laptop to the cloud -— Be highly reliable and scale both vertically and horizontally -— Provide SQL with many practical and convenient extensions -— Integrate with foreign data sources and streams - -## ClickHouse Spinout From Yandex - -Yandex N.V. is the largest internet company in Europe and employs over 14,000 people. They develop search, advertisement, and e-commerce services, ride tech and food tech solutions, self-driving cars... and also ClickHouse with a team of 15 engineers. It is hard to believe that we have managed to build a world-class leading analytical DBMS with such a small team while leveraging the global community. While this was barely enough to keep up with the development of the open-source product, everyone understands that the potential of ClickHouse technology highly outgrows such a small team. - -We decided to unite the resources: take the team of core ClickHouse developers, bring in a world-class business team led by [Aaron Katz](https://www.linkedin.com/in/aaron-katz-5762094/) and a cloud engineering team led by [Yury Izrailevsky](https://www.linkedin.com/in/yuryizrailevsky/), keep the power of open source, add the investment from the leading VC funds, and make an international company 100% focused on ClickHouse. I’m thrilled to announce ClickHouse, Inc. - -## What’s Next? - -Companies love ClickHouse because it gives tremendous improvements in data processing efficiency. But it is mostly about the core technology, the database server itself. We want to make ClickHouse suitable for all kinds of companies and enterprises, not just tech-savvy internet companies who are fine with managing their clusters. We want to lower the learning curve, make ClickHouse compliant with enterprise standards, make ClickHouse service to be instantly available in the cloud in a serverless way, make auto-scaling easy, and much more. - -Our mission is to make ClickHouse the first choice of analytical database management systems. Whenever you think about data analytics, ClickHouse should be the obvious preferred solution. I see how many companies already benefit from ClickHouse and I'm very eager to make it even more widespread and universally accepted across the world. Now we have the best engineers and the best entrepreneurs together and we are ready for the mission. - - -_2021-09-20, [Alexey Milovidov](https://github.com/alexey-milovidov)_ diff --git a/website/blog/en/2021/clickhouse-october-moscow-meetup.md b/website/blog/en/2021/clickhouse-october-moscow-meetup.md deleted file mode 100644 index fb77a5912e5..00000000000 --- a/website/blog/en/2021/clickhouse-october-moscow-meetup.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: 'ClickHouse Moscow Meetup October 19, 2021' -image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-october-moscow-meetup/featured.jpg' -date: '2021-11-11' -author: 'Rich Raposa' -tags: ['company', 'community'] ---- - -ClickHouse organized an online Meetup on October 19, 2021, hosted by our very own co-founder and CTO, Alexey Milovidov. There are a lot of new features to discuss in the 21.10 version of ClickHouse, along with many more new features coming up on the roadmap. - -There were over 200 attendees in person for the Meetup and 3,853 viewers online, and we want to thank everyone who attended live. You can watch the recording of the Meetup on YouTube [here](https://www.youtube.com/watch?v=W6h3_xykd2Y). - -Alexey Milovidov, Chief Technology Officer, welcomed and updated the community on ClickHouse Inc.'s latest news. Maksim Kita, Sr. Software Engineer at ClickHouse, started with a discussion on the new User Defined Functions (UDFs) available in 21.10. UDFs can be defined as lambda expressions using the CREATE FUNCTION command. For example: - -``` -CREATE FUNCTION a_plus_b AS (a, b) -> a + b -``` - -In addition to UDFs, there are two new table engines - Executable and ExecutablePool - that can stream records via stdin and stdout through custom scripts written in whatever language you prefer. For details, be sure to check out our [new training lesson on What's New in ClickHouse 21.10](https://clickhouse.com/learn/lessons/whatsnew-clickhouse-21.10/). - -You can now encrypt your data stored on S3, HDFS, external disks, or on a local disk. ClickHouse developers Vitaly Baranov and Artur Filatenkov discussed the details and benefits of encrypting your data at rest in ClickHouse. Vitaly presented the new full disk encryption feature and Arthur presented column-level encryption. - -![Disk Encryption Performance](https://blog-images.clickhouse.com/en/2021/clickhouse-october-moscow-meetup/disk-encryption-performance.jpg) - -![Arthur Filatenkov](https://blog-images.clickhouse.com/en/2021/clickhouse-october-moscow-meetup/arthur-filatenkov.jpg) - -Alexey then spent 40 minutes discussing some of the amazing new features on the ClickHouse roadmap, including: - -* ClickHouse Keeper: a new C++ coordination system for ClickHouse designed as an alternative to ZooKeeper -* Support for working with semi-structured data, including JSON objects with arbitrary nested objects -* Asynchronous insert mode - now you can insert data without batching! - -After the talk, Alexey took questions from users on: - -* How to parse User-Agent in ClickHouse -* Is it true that ClickHouse developers have a ClickHouse tattoo - -![YAML Configuration](https://blog-images.clickhouse.com/en/2021/clickhouse-october-moscow-meetup/yaml-configuration.jpg) - -* If you are excited about ClickHouse, be sure to join us on [Telegram](https://t.me/clickhouse_en) -* We also have a community Slack workspace be sure to join [here](https://clickhousedb.slack.com/). -* If you are new to ClickHouse and want to see it in action, check out our [Getting Started lesson](https://clickhouse.com/learn/lessons/gettingstarted/). diff --git a/website/blog/en/2021/clickhouse-raises-250m-series-b.md b/website/blog/en/2021/clickhouse-raises-250m-series-b.md deleted file mode 100644 index 3f6fd9d3928..00000000000 --- a/website/blog/en/2021/clickhouse-raises-250m-series-b.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: 'ClickHouse raises a $250M Series B at a $2B valuation...and we are hiring' -image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-raises-250m-series-b/featured.jpg' -date: '2021-10-28' -author: 'Dorota Szeremeta' -tags: ['company', 'investment'] ---- - -Today, we are excited to announce that ClickHouse has raised $250 million in Series B funding at a $2B valuation. The round was led by Coatue and Altimeter, with participation from existing investors Index, Benchmark, Yandex, Almaz, and Lead Edge alongside new investors Lightspeed, Redpoint, and FirstMark. We are thrilled to partner with such an accomplished group. Not only is it a testament to the disruptive potential of our technology and our momentum to date, but more importantly, it’s an aggressive investment in our future. This funding will enable ClickHouse to grow our team, scale the product, and continue building a world-class, commercial-grade cloud solution that’s secure, compliant, and convenient for any customer to use. (You can read more about the funding round in our press release [here](https://www.businesswire.com/news/home/20211028005287/en)). - -For the past five years, ClickHouse thrived as a popular open-source product— with a dedicated user community. ClickHouse thrived for a simple reason—we deliver high query processing speed and data storage efficiency that is unmatched. ClickHouse is capable of processing 100+ PBs of data with more than 100 billion records inserted every day. It is 100-1000X faster than traditional approaches and this speed and efficiency has attracted customers that are household names. The commercial potential for the product was clear - and in September 2021, we decided to combine resources and officially incorporate as a company. We took the team of core ClickHouse developers led by ClickHouse creator [Alexey Milovidov](https://www.linkedin.com/in/alexey-milovidov-7b658135/) and brought in a world-class business team led by [Aaron Katz](https://www.linkedin.com/in/aaron-katz-5762094/) and a cloud engineering team led by [Yury Izrailevsky](https://www.linkedin.com/in/yuryizrailevsky/). - -Fueled by this large investment, we are hiring. We plan to double our team this year, and double it again next year. We are calling on all visionaries, builders, and contributors to join us as we build ClickHouse into a transformative, paradigm-shifting company. Our mission is to become the first choice of analytical database management systems. We are relentlessly focused on building an enterprise-grade, highly secure, and fault tolerant, multi-tenant service in the cloud that is accessible to organizations across sizes and sectors. - -If this sounds like something you want to be a part of, now’s the time to join us. To learn more about the positions we are hiring for, and what we are looking for in new hires, visit [clickhouse.com/careers](/careers/). diff --git a/website/blog/en/2021/clickhouse-v21.10-released.md b/website/blog/en/2021/clickhouse-v21.10-released.md deleted file mode 100644 index 145f23ff129..00000000000 --- a/website/blog/en/2021/clickhouse-v21.10-released.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: 'ClickHouse v21.10 Released' -image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-v21-10/featured.jpg' -date: '2021-10-14' -author: 'Rich Raposa, Alexey Milovidov' -tags: ['company', 'community'] ---- - -We're excited to share with you our first release since [announcing ClickHouse, Inc](https://clickhouse.com/blog/en/2021/clickhouse-inc/). The 21.10 release includes new contributions from multiple contributors including many in our community, and we are grateful for your ongoing ideas, development, and support. Our Engineering team continues to be laser-focused on providing our community and users with the fastest and most scalable OLAP DBMS available while implementing many new features. In the 21.10 release, we have a wonderful 79 contributors with 1255 commits across 211 pull requests - what an amazing community and we cherish your contributions. - -Let's highlight some of these new exciting new capabilities in 21.10: - -* User-defined functions (UDFs) can now be [created as lambda expressions](https://clickhouse.com/docs/en/sql-reference/functions/#higher-order-functions). For example, `CREATE FUNCTION plus_one as (a) -> a + 1` -* Two new table engines: Executable and ExecutablePool which allow you to stream the results of a query to a custom shell script -* Instead of logging every query (which can be a lot of logs!), you can now log a random sample of your queries. The number of queries logged is determined by defining a specified probability between 0.0 (no queries logged) and 1.0 (all queries logged) using the new `log_queries_probability` setting. -* Positional arguments are now available in your GROUP BY, ORDER BY and LIMIT BY clauses. For example, `SELECT foo, bar, baz FROM my_table ORDER BY 2,3` orders the results by whatever the bar and baz columns (no need to specify column names twice!) - -We're also thrilled to announce some new free training available to you in our Learn ClickHouse portal: [https://clickhouse.com/learn/lessons/whatsnew-clickhouse-21.10/](https://clickhouse.com/learn/lessons/whatsnew-clickhouse-21.10/) - -We're always listening for new ideas, and we're happy to welcome new contributors to the ClickHouse project. Whether for submitting code or improving our documentation and examples, please get involved by sending us a pull request or submitting an issue. Our beginner developers contribution guide will help you get started: [https://clickhouse.com/docs/en/development/developer-instruction/](https://clickhouse.com/docs/en/development/developer-instruction/) - - -## ClickHouse Release Notes - -Release 21.10 - -Release Date: 2021-10-17 - -Release Notes: [21.10](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) diff --git a/website/blog/en/2021/clickhouse-v21.11-released.md b/website/blog/en/2021/clickhouse-v21.11-released.md deleted file mode 100644 index 0f478848410..00000000000 --- a/website/blog/en/2021/clickhouse-v21.11-released.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: 'ClickHouse v21.11 Released' -image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-v21-11/featured-dog.jpg' -date: '2021-11-11' -author: 'Rich Raposa, Alexey Milovidov' -tags: ['company', 'community'] ---- - -We're continuing our monthly release cadence and blog updates at[ ClickHouse, Inc](https://clickhouse.com/blog/en/2021/clickhouse-inc/). The 21.11 release includes asynchronous inserts, interactive mode, UDFs, predefined connections, and compression gains. Thank you to the 142 committers and 4337 commits for making this release possible. - -Let's highlight some of these new exciting new capabilities in 21.11: - -## Async Inserts - -New asynchronous INSERT mode allows to accumulate inserted data and store it in a single batch utilizing less disk resources(IOPS) enabling support of high rate of INSERT queries. On a client it can be enabled by setting `async_insert` for `INSERT` queries with data inlined in a query or in a separate buffer (e.g. for `INSERT` queries via HTTP protocol). If `wait_for_async_insert` is true (by default) the client will wait until data will be flushed to the table. On the server-side it can be tuned by the settings `async_insert_threads`, `async_insert_max_data_size` and `async_insert_busy_timeout_ms`. - -**How does this help our ClickHouse Users?** - -A notable pain point for users was around having to insert data in large batches and performance can sometimes be hindered. What if you have a monitoring use case and you want to do 1M records per second into ClickHouse; you would do large 100k record batches, but if you have 1,000 clients shipping data then that was hard to collect these batches to insert into ClickHouse. Historically to solve for this you might have to use Kafka or buffer tables to help with the balancing and insertion of data. - -Now, we've introduced this new mode of Async inserts where you can do a high rate of small inserts concurrently and ClickHouse will automatically group them together into batches and insert it into the table automatically. Every client will get an acknowledgement that the data was inserted successfully. - -## Local Interactive Mode - -We have added interactive mode for `clickhouse-local` so that you can just run `clickhouse-local` to get a command line ClickHouse interface without connecting to a server and process data from files and external data sources. - -**How does this help our ClickHouse Users?** - -What if you have an ad-hoc use case that you want to run analytics on a local file with ClickHouse? Historically, you'd have to spin up an empty ClickHouse server and connect it to the external data source that you were interested in running the query on e.g. S3, HDFS, URL's. Now with ClickHouse Local you can just run it just like a ClickHouse Client and have the same full interactive experience without any additional overhead steps around setup and ingestion of data to try out your idea or hypothesis. Hope you enjoy! - -## Executable UDFs - -Added support for executable (scriptable) user defined functions. These are UDFs that can be written in any programming language. - -**How does this help our ClickHouse Users?** - -We added UDFs in our 21.10 release. Similar to our October release we're continuing to innovate around the idea of making it more user friendly to plug in tools into ClickHouse as functions. This could be you doing an ML inference in your Python script and now you can define it as a function as available in SQL. Or, what if you wanted to do a DNS lookup? You have a domain name in a ClickHouse table and want to convert to an IP address with some function. Now just plug in an external script and this will go process and convert the domain names into IP addresses. - -## Predefined Connections - -Allow predefined connections to external data sources. This allows to avoid specifying credentials or addresses while using external data sources, they can be referenced by names instead. - -**How does this help our ClickHouse Users?** - -You're just trying to connect ClickHouse to another data source to load data, like MySQL for example, how do you do that? Before this feature you would have to handle all the credentials for MySql, use the MySQL table functions, know the user and password permissions to access certain tables, etc. Now you have a predefined required parameters inside the ClickHouse configuration and the user can just refer to this by a name e.g. MongoDB, HDFS, S3, MySQL and it's a one-time configuration going forward. - -## Compression - -Add support for compression and decompression for `INTO OUTFILE` and `FROM INFILE` (with autodetect or with additional optional parameter). - -**How does this help our ClickHouse Users?** - -Are you just looking to import and export data into ClickHouse more easily if you have compressed data? Before this feature you had to manually specify compression of input and output data into ClickHouse and even for stream insertion you'd still have to manage the decompression there too. Now, you can just write it as a file e.g. mytable.csv.gz --- and, go! - -In the last month, we've added new free Training modules including a What's New in 21.11. Take the lesson [here](https://clickhouse.com/learn/lessons/whatsnew-clickhouse-21.11/). - -## ClickHouse Release Notes - -Release 21.11 - -Release Date: 2021-11-09 - -Release Notes: [21.11](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) diff --git a/website/blog/en/2021/clickhouse-v21.12-released.md b/website/blog/en/2021/clickhouse-v21.12-released.md deleted file mode 100644 index 7b4c7862700..00000000000 --- a/website/blog/en/2021/clickhouse-v21.12-released.md +++ /dev/null @@ -1,285 +0,0 @@ ---- -title: 'What''s New in ClickHouse 21.12' -image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-v21-12/featured.jpg' -date: '2021-12-16' -author: 'Alexey Milovidov, Christoph Wurm' -tags: ['company', 'community'] ---- - -We're continuing our monthly release cadence. The 21.12 Christmas release includes 2460 new commits from 125 contributors, including 42 new contributors: - -> Alex Cao, Amr Alaa, Andrey Torsunov, Constantine Peresypkin, Dmitriy Dorofeev, Egor O'Sten, Elykov Alexandr, Evgeny, Frank Chen, LB, Natasha Murashkina, Peignon Melvyn, Rich Raposa, Roman Chyrva, Roman, SuperDJY, Thom O'Connor, Timur Magomedov, Tom Risse, Tomáš Hromada, cfcz48, cgp, cms, cmsxbc, congbaoyangrou, dongyifeng, frank chen, freedomDR, jus1096, khamadiev, laurieliyang, leosunli, liyang830, loneylee, michael1589, msaf1980, p0ny, qieqieplus, spume, sunlisheng, yandd, zhanghuajie. - -If you are wondering, this list is generated by the following command: - -``` -clickhouse-local --query " - SELECT arrayStringConcat(groupArray(s), ', ') - FROM file('contributors-21.12.txt', LineAsString, 's String') - WHERE s NOT IN ( - SELECT * - FROM file('contributors-21.11.txt', LineAsString, 's String')) - FORMAT TSVRaw" -``` - -And to list the contributors, you can always run the -``` -SELECT * FROM system.contributors -``` -query on your production server. - -Let's highlight some of the new capabilities in 21.12: - - -## ClickHouse Keeper is Feature Complete - -In 21.12 `clickhouse-keeper` started to support ["four letter commands"](https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands) for status and monitoring. This feature is contributed by **JackyWoo** and reviewed by **Alexander Sapin** (the author of ClickHouse Keeper). - -It was the only missing feature to implement. In this release, clickhouse-keeper is still considered in pre-production stage, but many companies already started to evaluate and use it as a replacement of ZooKeeper. You can also start using clickhouse-keeper in your testing environments and we will appreciate your feedback. - -ClickHouse Keeper development started in Sep 2020, more than a year ago. It was a long road, and most of the efforts were to ensure correctness and stability in unusual and exceptional scenarios. It is covered by [Jepsen](https://jepsen.io/) tests (including ZooKeeper tests and [new introduced tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/jepsen.clickhouse-keeper)), continuous randomized stress testing with ClickHouse functional and integration tests. It is started to be tested in Yandex Cloud and among our best friends. If you're pretending to be our best friend, you can also do it. - -**How does this help you?** - -ClickHouse Keeper is a drop-in replacement for ZooKeeper. It implements the ZooKeeper wire protocol and data model, but does it better. - -In contrast to ZooKeeper, there are no issues with zxid overflow or packet sizes. It has better memory usage and it does not require JVM tuning (because it does not use the JVM). Logs and snapshots are compressed (by about 10x typically) and checksummed. It can run as a separate process or directly inside clickhouse-server. You can use it with ClickHouse or with your Kafkas and Hadoops as well. - -[More info](http://presentations.clickhouse.tech/meetup54/keeper.pdf). - - -## Partitions For INSERT INTO File, URL And HDFS Storages - -When using the table engines `File`, `URL`, and `HDFS` ClickHouse now supports partitions. When creating a table you can specify the partition key using the `PARTITION BY` clause e.g. `CREATE TABLE hits_files (...) ENGINE = File(TabSeparated) PARTITION BY toYYYYMM(EventDate)`. - -Similarly, when exporting data from ClickHouse using the `file`, `url`, and `hdfs` table functions you can now specify that the data is to be partitioned into multiple files using a `PARTITION BY` clause. For example, `INSERT INTO TABLE FUNCTION file('path/hits_{_partition_id}', 'TSV', 'columns...') PARTITION BY toYYYYMM(EventDate) VALUES ...` will create as many files as there are unique months in the dataset. - -The `s3` table function has already supported partitioned writes since ClickHouse 21.10. - -**How does this help you?** - -If data is split into multiple files, `SELECT` queries will be automatically parallelized. For example: - -``` -SELECT user_id, count() FROM s3( - 'https://s3.us-east-2.amazonaws.com/.../*.csv.zstd', - '...', '...', - CSV, - 'user_id UInt64, ...') -``` - -You can even parallelize data processing across a distributed compute cluster if you use the `s3Cluster` table function: - -``` -SELECT user_id, count() FROM s3Cluster( - my_cluster, - 'https://s3.us-east-2.amazonaws.com/.../*.csv.zstd', - '...', - '...', CSV, - 'user_id UInt64, ...') -``` - -It can also be used for integration with external data processing tools that consume data from `s3`. - - -## FROM INFILE in clickhouse-client now supports glob patterns and parallel reading - -Just write: - -``` -INSERT INTO my_table FROM INFILE '*.csv.gz' FORMAT CSV -``` - -Glob patterns support `*`, `?` and `{n..m}` with `{1..10}` or (aligned) `{01..10}` forms. -This query will be automatically parallelized and it will also automatically detect the compression format from the file extension and decompress transparently. - -This improvement is done by **Arthur Filatenkov**. - -**How does this help you?** - -Now you don't have to recall how to write a parallel for loop in your command line shell. clickhouse-client will do everything for you, it works intuitively and fast. - - -## Support for INTERVAL operator inside WITH FILL modifier for ORDER BY clause - -What's the... `WITH FILL` modifier in the `ORDER BY` clause? Take a look at the example: - -``` -:) SELECT EventDate, count() FROM test.hits WHERE CounterID = 2841673 GROUP BY EventDate ORDER BY EventDate - -┌──EventDate─┬─count()─┐ -│ 2014-03-17 │ 3 │ -│ 2014-03-19 │ 6 │ -│ 2014-03-21 │ 7 │ -│ 2014-03-22 │ 6 │ -└────────────┴─────────┘ -``` - -We have the report with Mar 17th, 19th, 21st, and 22nd. But Mar 18th and 20th are missing, because there is no data for these dates. -And this is how it works in all SQL databases. - -But ClickHouse also has a quite unique and neat `WITH FILL` modifier for the `ORDER BY` clause. - -You just write: -``` -SELECT EventDate, count() FROM test.hits WHERE CounterID = 2841673 GROUP BY EventDate -ORDER BY EventDate WITH FILL STEP 1 - -┌──EventDate─┬─count()─┐ -│ 2014-03-17 │ 3 │ -│ 2014-03-18 │ 0 │ -│ 2014-03-19 │ 6 │ -│ 2014-03-20 │ 0 │ -│ 2014-03-21 │ 7 │ -│ 2014-03-22 │ 6 │ -└────────────┴─────────┘ -``` - -And missing data is automatically filled. - -You can also add `FROM` and `TO`: - -``` -ORDER BY EventDate WITH FILL FROM '2014-03-01'::Date TO '2014-03-31'::Date STEP 1; -``` - -And it will automatically fill missing rows in the report. - -The `STEP` can be an arbitrary number. But what can you do if you want to fill missing dates for a report by months? You cannot just write `STEP 30` or `STEP 31` because different months contain different number of days... - -Since ClickHouse version 21.12 you can do it like this: - -``` -ORDER BY EventDate WITH FILL STEP INTERVAL 1 MONTH -``` - -`INTERVAL` is a standard SQL operator, you can use SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER and YEAR. - -This is implemented by **Anton Popov** who is the author of the "WITH FILL" feature. - -**How does this help you?** - -It allows you to avoid a postprocessing step for your reports. - - -## Add Support For "Identifier" Table and Database Query Parameters - -ClickHouse has support for parameterized queries. For example: - -``` -SELECT uniq(user_id) FROM table WHERE website = {name:String} -``` - -It allows to safely substitute parameters without the risk of SQL injections: - -``` -curl https://clickhouse-server:8443/?param_name=upyachka -d 'SELECT uniq(user_id) FROM table WHERE website = {name:String}' -``` - -You can even create customized API handlers for clickhouse-server based on prepared queries. - -In version 21.12 we introduce support for using parameters for tables and databases in your queries. This is implemented with the `Identifier` table parameter: - -``` -SELECT uniq(user_id) FROM {tbl:Identifier} -``` - -Identifier parameters also work for CREATE, DROP and all DDL queries. This is implemented by **Nikolai Degterinskiy**. - -**How does this help you?** - -Let ClickHouse do the heavy lifting and keep your scripts safe and secure. - - -## Bool Data Type - -This feature is experimental in version 21.12. It is implemented by **Kevin Wan (MaxWk)** on top of initial work by **hczhcz** and reviewed by **Pavel Kruglov**. - -ClickHouse now natively supports a `Bool` data type. It allows to represent values as "true"/"false" during data import and export in text formats. It can also be adjusted to anything else using the settings `bool_true_representation` and `bool_false_representation` (for example, "yes" and "no"). - -**How does this help you?** - -Native boolean data types exist today in other databases that are often integrated with ClickHouse, such as PostgreSQL. The `Bool` data type in ClickHouse will make it more compatible with existing code and ease migration from other databases. - -Also it simplifies data ingestion from various text sources. - - -## Query Optimizations With Table Constraints - -This feature is [contributed](https://github.com/ClickHouse/ClickHouse/pull/18787) by **Nikita Vasilev**. Nikita is one of the most notable ClickHouse contributors. He started in 2019 by introducing data skipping indices into ClickHouse, then continued in 2020 with SSD-optimized key-value dictionaries and now contributed the new advancements in the query optimizer. This feature is reviewed by **Anton Popov**. - -So, what optimizations? ClickHouse already allows to specify constraints for tables: - -``` -CREATE TABLE -( - URL String, - Domain String, - CONSTRAINT validate CHECK isValidUTF8(URL) AND length(URL) BETWEEN 10 AND 10000, - CONSTRAINT my_constraint CHECK Domain = domainWithoutWWW(URL) -) ... -``` - -Constraints are checked on `INSERT`. In this example we validate the URL and check that the `Domain` column actually contains the domain of the URL. - -Since version 21.12 constraints can also automatically optimize your queries! For example, if you write: - -``` -SELECT count() FROM hits WHERE domainWithoutWWW(URL) = 'ghe.clickhouse.tech' -``` - -The query can be automatically rewritten to: - -``` -SELECT count() FROM hits WHERE Domain = 'ghe.clickhouse.tech' -``` - -Because the `Domain` column is smaller and more compressable it will be faster to read and does not require calculation of the domain from the URL. -The only thing you need to do is to enable the `optimize_using_constraints` and `optimize_substitute_columns` settings. - -As a bonus, we introduced a new type of constraint: `ASSUME`. - -``` -CONSTRAINT my_constraint ASSUME Domain = domainWithoutWWW(URL) -``` - -This type of constraint will not check anything on `INSERT` but still use the assumption to optimize the queries. - -It can also do logical inference, simplify the conditions and remove the conditions that are proved to be satisfied by constraints. -It is controlled by the `convert_query_to_cnf` setting. You can also enable `optimize_append_index`. With this setting ClickHouse will derive more conditions on the table primary key. - -The idea is so powerful that we cannot resist adding one more feature: *indices for hypothesis*. - -``` -INDEX my_index (a < b) TYPE hypothesis GRANULARITY 1 -``` - -The expression is checked and the result (true/false) is written as an index for query optimization. - -**How does this help you?** - -Especially in large ClickHouse deployments with many complex tables it can be hard for users to always be up to date on the best way to query a given dataset. Constraints can help optimize queries without having to change the query structure itself. They can also make it easier to make changes to tables. - -For example, let's say you have a table containing web requests and it includes a URL column that contains the full URL of each request. Many times, users will want to know the top level domain (.com, .co.uk, etc.), something ClickHouse provides the `topLevelDomain` function to calculate. If you discover that many people are using this function you might decide to create a new materialized column that pre-calculates the top level domain for each record. - -Rather than tell all your users to change their queries you can use a table constraint to tell ClickHouse that each time a user tries to call the `topLevelDomain` function the request should be rewritten to use the new materialized column. - - -## Read Large Remote Files In Chunks - -ClickHouse combines a fast query engine and efficient data storage. It also allows to integrate external data sources for data import and export or even to process external datasets on the fly without the need for data import or preprocessing. - -When reading large files in `Parquet`, `ORC`, and `Arrow` format using the `s3`, `url`, and `hdfs` table functions, ClickHouse will now automatically choose whether to read the entire file at once or read parts of it incrementally. This is now enabled by default and the setting `remote_read_min_bytes_for_seek` controls when to switch from reading it all to reading in chunks. The default is 1MiB. - -`Parquet`, `ORC`, and `Arrow` are column-oriented formats (quite similar to the ClickHouse Native format) and now we can read only requested columns even if they are being read from a remote HTTP server with the `url` table function (range requests will be performed to skip unneeded data). - -This feature is implemented by **Kseniia Sumarokova**. - -**How does this help our ClickHouse Users?** - -In previous versions, when reading files in Arrow-based formats from remote locations with the `s3`, `url`, and `hdfs` table functions, ClickHouse would always read the entire file into memory. This works well when the files are small but will cause excessive memory usage or not work at all when the files are large. With this change, ClickHouse will read large files in chunks to keep memory usage in check and is now able to read even very large files. - - -## ... And Many More - -Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for the 21.12 "Christmas" release for the full list of gifts from the [ClickHouse Team](https://clickhouse.com/company/). diff --git a/website/blog/en/2021/code-review.md b/website/blog/en/2021/code-review.md deleted file mode 100644 index d726dc7aa4f..00000000000 --- a/website/blog/en/2021/code-review.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -title: 'The Tests Are Passing, Why Would I Read The Diff Again?' -image: 'https://blog-images.clickhouse.com/en/2021/code-review/two-ducks.jpg' -date: '2021-04-14' -author: 'Alexander Kuzmenkov' -tags: ['code review', 'development'] ---- - - -Code review is one of the few software development techniques that are consistently found to reduce the incidence of defects. Why is it effective? This article offers some wild conjecture on this topic, complete with practical advice on getting the most out of your code review. - - -## Understanding Why Your Program Works - -As software developers, we routinely have to reason about the behaviour of software. For example, to fix a bug, we start with a test case that exhibits the behavior in question, and then read the source code to see how this behavior arises. Often we find ourselves unable to understand anything, having to resort to forensic techniques such as using a debugger or interrogating the author of the code. This situation is far from ideal. After all, if we have trouble understanding our software, how can we be sure it works at all? No surprise that it doesn't. - -The correct understanding is also important when modifying and extending software. A programmer must always have a precise mental model on what is going on in the program, how exactly it maps to the domain, and so on. If there are flaws in this model, the code they write won't match the domain and won't solve the problem correctly. Wrong understanding directly causes bugs. - -How can we make our software easier to understand? It is often said that to see if you really understand something, you have to try explaining it to somebody. For example, as a science student taking an exam, you might be expected to give an explanation to some well-known observed effect, deriving it from the basic laws of this domain. In a similar way, if we are modeling some problem in software, we can start from domain knowledge and general programming knowledge, and build an argument as to why our model is applicable to the problem, why it is correct, has optimal performance and so on. This explanation takes the form of code comments, or, at a higher level, design documents. - -If you have a habit of thoroughly commenting your code, you might have noticed that writing the comments is often much harder than writing the code itself. It also has an unpleasant side effect — at times, while writing a comment, it becomes increasingly clear to you that the code is incomprehensible and takes forever to explain, or maybe is downright wrong, and you have to rewrite it. This is exactly the major positive effect of writing the comments. It helps you find bugs and make the code more understandable, and you wouldn't have noticed these problems unless you tried to explain the code. - -Understanding why your program works is inseparable from understanding why it fails, so it's no surprise that there is a similar process for the latter, called "rubber duck debugging". To debug a particularly nasty bug, you start explaining the program logic step by step to an imaginary partner or even to an inanimate object such as a yellow rubber duck. This process is often very effective, much in excess of what one would expect given the limited conversational abilities of rubber ducks. The underlying mechanism is probably the same as with comments — you start to understand your program better by just trying to explain it, and this lets you find bugs. - -When working in a team, you even have a luxury of explaining your code to another developer who works on the same project. It's probably more entertaining than talking to a duck. More importantly, they are going to maintain the code you wrote, so better make sure that _they_ can understand it as well. A good formal occasion for explaining how your code works is the code review process. Let's see how you can get the most out of it, in terms of making your code understandable. - -## Reviewing Others Code - -Code review is often framed as a gatekeeping process, where each contribution is vetted by maintainers to ensure that it is in line with project direction, has acceptable quality, meets the coding guidelines and so on. This perspective might seem natural when dealing with external contributions, but makes less sense if you apply it to internal ones. After all, our fellow maintainers have perfect understanding of project goals and guidelines, probably they are more talented and experienced than us, and can be trusted to produce the best solution possible. How can an additional review be helpful? - -A less-obvious, but very important, part of reviewing the code is just seeing whether it can be understood by another person. It is helpful regardless of the administrative roles and programming proficiency of the parties. What should you do as a reviewer if ease of understanding is your main priority? - -You probably don't need to be concerned with trivia such as code style. There are automated tools for that. You might find some bugs, but this is probably a side effect. Your main task is making sense of the code. - -Start with checking the high-level description of the problem that the pull request is trying to solve. Read the description of the bug it fixes, or the docs for the feature it adds. For bigger features, there is normally a design document that describes the overall implementation without getting too deep into the code details. After you understand the problem, start reading the code. Does it make sense to you? You shouldn't try too hard to understand it. Imagine that you are tired and under time pressure. If you feel you have to make a lot of effort to understand the code, ask the author for clarifications. As you talk, you might discover that the code is not correct, or it may be rewritten in a more straightforward way, or it needs more comments. - - - -After you get the answers, don't forget to update the code and the comments to reflect them. Don't just stop after getting it explained to you personally. If you had a question as a reviewer, chances are that other people will also have this question later, but there might be nobody around to ask. They will have to resort to `git blame` and re-reading the entire pull request or several of them. Code archaeology is sometimes fun, but it's the last thing you want to do when you are investigating an urgent bug. All the answers should be on the surface. - -Working with the author, you should ensure that the code is mostly obvious to anyone with basic domain and programming knowledge, and all non-obvious parts are clearly explained. - -### Preparing Your Code For Review - -As an author, you can also do some things to make your code easier to understand for the reviewer. - -First of all, if you are implementing a major feature, it probably needs a round of design review before you even start writing code. Skipping a design review and jumping right into the code review can be a major source of frustration, because it might turn out that even the problem you are solving was formulated incorrectly, and all your work has to be thrown away. Of course, this is not prevented completely by design review, either. Programming is an iterative, exploratory activity, and in complex cases you only begin to grasp the problem after implementing a first solution, which you then realize is incorrect and has to be thrown away. - -When preparing your code for review, your major objective is to make your problem and its solution clear to the reviewer. A good tool for this is code comments. Any sizable piece of logic should have an introductory comment describing its general purpose and outlining the implementation. This description can reference similar features, explain the difference to them, explain how it interfaces with other subsystems. A good place to put this general description is a function that serves as a main entry point for the feature, or other form of its public interface, or the most significant class, or the file containing the implementation, and so on. - -Drilling down to each block of code, you should be able to explain what it does, why it does that, why this way and not another. If there are several ways of doing the thing, why did you choose this one? Of course, for some code these things follow from the more general comments and don't have to be restated. The mechanics of data manipulation should be apparent from the code itself. If you find yourself explaining a particular feature of the language, it's probably best not to use it. - -Pay special attention to making the data structures apparent in the code, and their meaning and invariants well commented. The choice of data structures ultimately determines which algorithms you can apply, and sets the limits of performance, which is another reason why we should care about it as ClickHouse developers. - -When explaining the code, it is important to give your reader enough context, so that they can understand you without a deep investigation of the surrounding systems and obscure test cases. Give pointers to all the things that might be relevant to the task. If you know some corner cases which your code has to handle, describe them in enough detail so that they can be reproduced. If there is a relevant standard or a design document, reference it, or even quote it inline. If you're relying on some invariant in other system, mention it. It is good practice to add programmatic checks that mirror your comments, when it is easy to do so. Your comment about an invariant should be accompanied by an assertion, and an important scenario should be reproduced by a test case. - -Don't worry about being too verbose. There is often not enough comments, but almost never too much of them. - -## Common Concerns about Code Comments - -It is common to hear objections to the idea of commenting the code, so let's discuss a couple of usual ones. - -### Self-documenting Code - -You can often see a perplexing idea that the source code can somehow be "self-documenting", or that the comments are a "code smell", and their presence indicates that the code is badly written. I have trouble imagining how this belief can be compatible with any experience in maintaining sufficiently complex and large software, over the years, in collaboration with others. The code and the comments describe different parts of the solution. The code describes the data structures and their transformations, but it cannot convey meaning. The names in the code serve as pointers that map the data and its transformations to the domain concepts, but they are schematic and lack nuance. It is not so difficult to write code that makes it easy to understand what's going on in terms of data manipulation. What it takes is mostly moderation, that is, stopping yourself from being too clever. For most code, it is easy to see what it does, but why? Why this way and not that way? Why is it correct? Why this fast path here helps? Why did you choose this data layout? How is this invariant guaranteed? And so on. This might be not so evident for a developer who is working alone on a short-lived project, because they have all the necessary context in their head. But when they have to work with other people (or even with themselves from past and future), or in an unfamiliar area, the importance of non-code, higher-level context becomes painfully clear. The idea that we should, or even can, somehow encode comments such as [this one](https://github.com/ClickHouse/ClickHouse/blob/26d5db32ae5c9f54b8825e2eca1f077a3b17c84a/src/Storages/MergeTree/KeyCondition.cpp#L1312-L1347) into names or control flow is just absurd. - -### Obsolete Comments - -The comments can't be checked by the compiler or the tests, so there is no automated way to make sure that they are up to date with the rest of the comments and the code. The possibility of comments gradually getting incorrect is sometimes used as an argument against having any comments at all. - -This problem is not exclusive to the comments — the code also can and does become obsolete. Simple cases such as dead code can be detected by static analysis or studying the test coverage of code. More complex cases can only be found by proofreading, such as maintaining an invariant that is not important anymore, or preparing some data that is not needed. - -While an obsolete comment can lead to a mistake, the same applies, perhaps more strongly, to the lack of comments. When you need some higher-level knowledge about the code, but it is not written down, you are forced to perform an entire investigation from first principles to understand what's going on, and this is error-prone. Even an obsolete comment likely gives a better starting point than nothing. Moreover, in a code base that makes an active use of the comments, they tend to be mostly correct. This is because the developers rely on comments, read and write them, pay attention to them during code review. The comments are routinely changed along with changing the code, and the outdated comments are soon noticed and fixed. This does require some habit. A lone comment in a vast desert of impenetrable self-documenting code is not going to fare well. - - -## Conclusion - -Code review makes your software better, and a significant part of this probably comes from trying to understand what your software actually does. By paying attention specifically to this aspect of code review, you can make it even more efficient. You'll have less bugs, and your code will be easier to maintain — and what else could we ask for as software developers? - - -_2021-04-13 [Alexander Kuzmenkov](https://github.com/akuzm). Title photo by [Nikita Mikhaylov](https://github.com/nikitamikhaylov)_ - -_P.S. This text contains the personal opinions of the author, and is not an authoritative manual for ClickHouse maintainers._ diff --git a/website/blog/en/2021/fuzzing-clickhouse.md b/website/blog/en/2021/fuzzing-clickhouse.md deleted file mode 100644 index 3fa518aecce..00000000000 --- a/website/blog/en/2021/fuzzing-clickhouse.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: 'Fuzzing ClickHouse' -image: 'https://blog-images.clickhouse.com/en/2021/fuzzing-clickhouse/some-checks-were-not-successful.png' -date: '2021-03-11' -author: 'Alexander Kuzmenkov' -tags: ['fuzzing', 'testing'] ---- - -Testing is a major problem in software development: there is never enough of it. It becomes especially true for a database management system, whose task is to interpret a query language that works on the persistent state managed by the system in a distributed fashion. Each of these three functions is hard enough to test even in isolation, and it gets much worse when you combine them. As ClickHouse developers, we know this from experience. Despite a large amount of automated testing of all kinds we routinely perform as part of our continuous integration system, new bugs and regressions are creeping in. We are always looking for the ways to improve our test coverage, and this article will describe our recent development in this area — the AST-based query fuzzer. - -## How to Test a SQL DBMS - -A natural form of testing for a SQL DBMS is to create a SQL script describing the test case, and record its reference result. To test, we run the script and check that the result matches the reference. This is used in many SQL DBMS, and it is the default kind of a test you are expected to write for any ClickHouse feature or fix. Currently we have [73k lines of SQL tests alone](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/0_stateless), that reach the [code coverage of 76%](https://clickhouse-test-reports.s3.yandex.net/0/47d684a5c35410201d4dd4f63f3287bf25cdabb7/coverage_report/test_output/index.html). - -This form of testing, where a developer writes a few simplified examples of how the feature can and cannot be used, is sometimes called "example-based testing". Sadly, the bugs often appear in various corner cases and intersections of features, and it is not practical to enumerate all of these cases by hand. There is a technique for automating this process, called "property-based testing". It lets you write more general tests of the form "for all values matching these specs, the result of some operation on them should match this other spec". For example, such a test can check that if you add two positive numbers, the result is greater than both of them. But you don't specify which numbers exactly, only these properties. Then, the property testing system randomly generates some examples with particular numbers that match the specification, and checks that the result also matches its specification. - -Property-based testing is said to be very efficient, but requires some developer effort and expertise to write the tests in a special way. There is another well-known testing technique that is in some sense a corner case of property-based testing, and that doesn't require much developer time. It is called fuzzing. When you are fuzzing your program, you feed it random inputs generated according to some grammar, and the property you are checking is that your program terminates correctly (no segfaults or assertions or other kinds of program errors). Most often, the grammar of input for fuzzing is simple — say, bit flips and additions, or maybe some dictionary. The space of possible inputs is huge, so to find interesting paths in it, fuzzing software records the code paths taken by the program under test for a particular input, and focuses on the inputs that lead to new code paths that were not seen before. It also employs some techniques for finding interesting constant values, and so on. In general, fuzzing allows you to find many interesting corner cases in your program automatically, without much developer involvement. - -Generating valid SQL queries with bit flips would take a long time, so there are systems that generate queries based on the SQL grammar, such as [SQLSmith](https://github.com/anse1/sqlsmith). They are succesfully used for finding bugs in databases. It would be interesting to use such a system for ClickHouse, but it requires some up-front effort to support the ClickHouse SQL grammar and functions, which may be different from the standard. Also, such systems don't use any feedback, so while they are much better than systems with primitive grammar, they still might have a hard time finding interesting examples. But we already have a big corpus of human-written interesting SQL queries — it's in our regression tests. Maybe we can use them as a base for fuzzing? We tried to do this, and it turned out to be surprisingly simple and efficient. - -## AST-based Query Fuzzer - -Consider some SQL query from a regression test. After parsing, it is easy to mutate the resulting AST (abstract syntax tree, an internal representation of the parsed query) before execution to introduce random changes into the query. For strings and arrays, we make random modifications such as inserting a random character or doubling the string. For numbers, there are well-known Bad Numbers such as 0, 1, powers of two and nearby, integer limits, `NaN`. `NaN`s proved to be especially efficient in finding bugs, because you can often have some alternative branches in your numeric code, but for a `NaN`, both branches hold (or not) simultaneously, so this leads to nasty effects. - -Another interesting thing we can do is change the arguments of functions, or the list of expressions in `SELECT`, `ORDER BY` and so on. Naturally, all the interesting arguments can be taken from other test queries. Same goes for changing the tables used in the queries. When the fuzzer runs in CI, it runs queries from all the SQL tests in random order, mixing into them some parts of queries it has seen previously. This process can eventually cover all the possible permutations of our features. - -The core implementation of the fuzzer is relatively small, consisting of about 700 lines of C++ code. A prototype was made in a couple of days, but naturally it took significantly longer to polish it and to start routinely using it in CI. It is very productive and let us find more than 200 bugs already (see the label [fuzz](https://github.com/ClickHouse/ClickHouse/labels/fuzz) on GitHub), some of which are serious logic errors or even memory errors. When we only started, we could segfault the server or make it enter a never-ending loop with simplest read-only queries such as `SELECT arrayReverseFill(x -> (x < 10), [])` or `SELECT geoDistance(0., 0., -inf, 1.)`. Of course I couldn't resist bringing down our [public playground](https://gh-api.clickhouse.com/play?user=play#LS0gWW91IGNhbiBxdWVyeSB0aGUgR2l0SHViIGhpc3RvcnkgZGF0YSBoZXJlLiBTZWUgaHR0cHM6Ly9naC5jbGlja2hvdXNlLnRlY2gvZXhwbG9yZXIvIGZvciB0aGUgZGVzY3JpcHRpb24gYW5kIGV4YW1wbGUgcXVlcmllcy4Kc2VsZWN0ICdoZWxsbyB3b3JsZCc=) with some of these queries, and was content to see that the server soon restarts correctly. These queries are actually minified by hand, normally the fuzzer would generate something barely intelligible such as: -``` -SELECT - (val + 257, - (((tuple(NULL), 10.000100135803223), tuple(-inf)), '-1', (NULL, '0.10', NULL), NULL), - (val + 9223372036854775807) = (rval * 100), - tuple(65535), tuple(NULL), NULL, NULL), - * -FROM -( - SELECT dummy AS val - FROM system.one -) AS s1 -ANY LEFT JOIN -( - SELECT toLowCardinality(toNullable(dummy)) AS rval - FROM system.one -) AS s2 ON (val + 100) = (rval * 7) -``` -In principle, we could add automated test case minification by modifying AST in the same vein with fuzzing. This is somewhat complicated by the fact that the server dies after every, excuse my pun, successfully failed query, so we didn't implement it yet. - -Not all errors the fuzzer finds are significant, some of them are pretty boring and harmless, such as a wrong error code for an out-of-bounds argument. We still try to fix all of them, because this lets us ensure that under normal operation, the fuzzer doesn't find any errors. This is similar to the approach usually taken with compiler warnings and other optional diagnostics — it's better to fix or disable every single case, so that you can be sure you have no diagnostics if everything is OK, and it's easy to notice new problems. - -After fixing the majority of pre-existing error, this fuzzer became efficient for finding errors in new features. Pull requests introducing new features normally add an SQL test, and we pay extra attention to the new tests when fuzzing, generating more permutations for them. Even if the coverage of the test is not sufficient, there is a good chance that the fuzzer will find the missing corner cases. So when we see that all the fuzzer runs in different configurations have failed for a particular pull request, this almost always means that it introduces a new bug. When developing a feature that requires new grammar, it is also helpful to add fuzzing support for it. I did this for window functions early in the development, and it helped me find several bugs. - -A major factor that makes fuzzing really efficient for us is that we have a lot of assertions and other checks of program logic in our code. For debug-only checks, we use the plain `assert` macro from ``. For checks that are needed even in release mode, we use an exception with a special code `LOGICAL_ERROR` that signifies an internal program error. We did some work to ensure that these errors are distinct from errors caused by the wrong user actions. A user error reported for a randomly generated query is normal (e.g. it references some non-existent columns), but when we see an internal program error, we know that it's definitely a bug, same as an assertion. Of course, even without assertions, you get some checks for memory errors provided by the OS (segfaults). Another way to add runtime checks to your program is to use some kind of sanitizer. We already run most of our tests under clang's Address, Memory, UndefinedBehavior and Thread sanitizers. Using them in conjunction with this fuzzer also proved to be very efficient. - -To see for yourself how the fuzzer works, you only need the normal ClickHouse client. Start `clickhouse-client --query-fuzzer-runs=100`, enter any query, and enjoy the client going crazy and running a hundred of random queries instead. All queries from the current session become a source for expressions for fuzzing, so try entering several different queries to get more interesting results. Be careful not to do this in production! When you do this experiment, you'll soon notice that the fuzzer tends to generate queries that take very long to run. This is why for the CI fuzzer runs we have to configure the server to limit query execution time, memory usage and so on using the corresponding [server settings](/docs/en/operations/settings/query-complexity/#:~:text=In%20the%20default%20configuration%20file,query%20within%20a%20single%20server.). We had a hilarious situation after that: the fuzzer figured out how to remove the limits by generating a `SET max_execution_time = 0` query, and then generated a never-ending query and failed. Thankfully we were able to defeat its cleverness by using [settings constraints](/docs/en/operations/settings/constraints-on-settings/). - -## Other Fuzzers - -The AST-based fuzzer we discussed is only one of the many kinds of fuzzers we have in ClickHouse. There is a [talk](https://www.youtube.com/watch?v=GbmK84ZwSeI&t=4481s) (in Russian, [slides are here](https://presentations.clickhouse.com/cpp_siberia_2021/)) by Alexey Milovidov that explores all the fuzzers we have. Another interesting recent development is application of pivoted query synthesis technique, implemented in [SQLancer](https://github.com/sqlancer/sqlancer), to ClickHouse. The authors are going to give [a talk about this](https://heisenbug-piter.ru/2021/spb/talks/nr1cwknssdodjkqgzsbvh/) soon, so stay tuned. diff --git a/website/blog/en/2021/how-to-enable-predictive-capabilities-in-clickhouse-databases.md b/website/blog/en/2021/how-to-enable-predictive-capabilities-in-clickhouse-databases.md deleted file mode 100644 index a73f6dcf91d..00000000000 --- a/website/blog/en/2021/how-to-enable-predictive-capabilities-in-clickhouse-databases.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: 'How to Enable Predictive Capabilities in Clickhouse Databases' -image: 'https://blog-images.clickhouse.com/en/2021/mindsdb-enables-predictive-capabilities-in-clickHouse/featured.png' -date: '2021-12-14' -author: 'Ilya Yatsishin' -tags: ['company', 'how-to', 'MindsDB'] ---- - -ClickHouse is a fast, open-source, column-oriented SQL database that is very useful for data analysis and real-time analytics and with MindsDB can be turned into a powerful machine learning platform for business forecasting. - -In this article, we will -- Guide you through the machine learning workflow and how to use ClickHouse’s powerful tools, like materialized views, to better and more effectively handle data cleaning and preparation - especially for the large datasets with billions of rows of data, -- Explore the concept of AI Tables from MindsDB and how they can be used within ClickHouse to automatically build predictive models and make forecasts using simple SQL statements, and -- Share how MindsDB automates really complex machine learning tasks, like multivariate time-series analysis with high cardinality, show how to detect anomalies, and visualize these predictions. - -[Read Further](https://mindsdb.com/blog/enabling-predictive-capabilities-in-clickhouse-database/?utm_medium=referral&utm_source=clickhouse&utm_campaign=clickhouse-ml-article-2021-12) diff --git a/website/blog/en/2021/performance-test-1.md b/website/blog/en/2021/performance-test-1.md deleted file mode 100644 index 1564b1c8a76..00000000000 --- a/website/blog/en/2021/performance-test-1.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: 'Testing the Performance of ClickHouse' -image: 'https://blog-images.clickhouse.com/en/2021/performance-testing-1/chebu-crop.jpg' -date: '2021-08-19' -author: 'Alexander Kuzmenkov' -tags: ['testing', 'performance'] ---- - -One of the main selling points of ClickHouse is that it's very fast, in many cases utilizing the hardware up to the theoretical limits. This was noted by many independent benchmark such as [this one](http://brandonharris.io/redshift-clickhouse-time-series/). This speed boils down to a right combination of architectural choices and algorithmic optimizations, sprinkled with a dash of pixie dust. There is an [overview of these factors](/docs/en/faq/general/why-clickhouse-is-so-fast) on our website, or a talk by the ClickHouse lead developer Alexey Milovidov ["The secrets of ClickHouse performance optimizations"](https://www.youtube.com/watch?v=ZOZQCQEtrz8). But this is a static picture of "how the things are". Software is a living and changing organism, and ClickHouse is changing very fast — to give you a scale, in July 2021 we merged 319 pull requests made by 60 different authors ([live statistics here](https://gh-api.clickhouse.com/play?user=play#c2VsZWN0IGRhdGVfdHJ1bmMoJ21vbnRoJywgY3JlYXRlZF9hdCkgbW9udGgsIHVuaXEoY3JlYXRvcl91c2VyX2xvZ2luKSBhdXRob3JzLCB1bmlxKG51bWJlcikgcHJzIGZyb20gZ2l0aHViX2V2ZW50cyB3aGVyZSByZXBvX25hbWUgPSAnQ2xpY2tIb3VzZS9DbGlja0hvdXNlJyBhbmQgbm90IGhhc0FueShsYWJlbHMsIFsncHItYmFja3BvcnQnLCAncHItZG9jdW1lbnRhdGlvbicsICdwci1jaGVycnlwaWNrJ10pIGFuZCBtZXJnZWQgYW5kIGNyZWF0ZWRfYXQgYmV0d2VlbiAnMjAyMC0wOS0wMScgYW5kICcyMDIxLTA5LTAxJyBncm91cCBieSBtb250aA==)). Any quality that is not actively selected for is going to be lost in this endless stream of changes, and the performance is no exception. For this reason, we have to have some process that allows us to ensure than ClickHouse always stays fast. - -# Measuring and Comparing the Performance - -How do we know it is fast, in the first place? We do a lot of benchmarks, many kinds of them. The most basic kind of a benchmark is a micro-benchmark, that doesn't use the full code of the server and tests a particular algorithm in isolation. We use them to choose a better inner loop for some aggregate function, or to test various layouts of hash tables, and so on. For example, when we discovered that a competing database engine completes a query with `sum` aggregate function twice as fast, we tested a couple of dozen implementations of `sum` to ultimately find the one that gives the best performance (see [a talk](https://www.youtube.com/watch?v=MJJfWoWJq0o) about this, in Russian). But testing a particular algorithm by itself is not enough to say how the entire query is going to work. We have to also make end-to-end measurements of entire queries, often using the real production data, because the particulars of the data (e.g. the cardinality and the distribution of values) heavily influence the performance. Currently we have about 3000 end-to-end test queries organized into about 200 [tests](https://github.com/ClickHouse/ClickHouse/tree/6c4c3df96e41425185beb0c471a8dde0ce6f25a7/tests/performance). Many of them use real data sets, such as the [production data of Yandex.Metrica](/docs/en/getting-started/example-datasets/metrica/), obfuscated with `clickhouse-obfuscator` as described [here](https://habr.com/ru/company/yandex/blog/485096/). - -Micro-benchmarks are normally ran by a developer while working on the code, but it is not practical to manually run the entire battery of the end-to-end tests for each change. We use an automated system that does this for each pull request as part of continuous integration checks. It measures whether the code changes introduced by a pull request influenced the performance, for which kinds of queries and by how much, and alerts the developer if there is a regression. Here is how a typical report looks. - - - -To talk about "changes in performance", we first have to measure this performance. The most natural measure for a single query is elapsed time. It is susceptible to random variations, so we have to take several measurements and average them in some way. From the application point of view, the most interesting statistic is maximum. We want to guarantee that e.g. an analytical dashboard built on ClickHouse is responsive. However, the query time can grow almost without limit due to random factor such as sudden disk load spikes or network delays, so using the maximum is not practical. The minimum is also interesting — after all, there is a theoretical bound on it. We know that the particular algorithm can run only so fast on the particular hardware, in ideal conditions. But if we only look at the minimum, we are going to miss cases where some runs of the query are slow and some are not (e.g. boundary effects in some cache). So we compromise by measuring the median. It is a robust statistic that is reasonably sensitive to outliers and stable enough against noise. - -After measuring the performance, how do we determine that it has changed? Due to various random and systematic factors, the query time always drifts, so the number always changes, but the question is whether this change is meaningful. If we have an old version of the server, and a new version of the server, are they going to consistently give a different result for this query, or was it just a fluke? To answer this, we have to employ some statistical method. The core idea of these methods is comparing the observed values to some reference distribution, and deciding whether what we observed can plausibly belong to this distribution, or, on the contrary, it cannot, which means that the performance characteristics of the patched server are indeed different. - -Choosing the reference distribution is the starting point. One way to obtain it is to build a mathematical model of the process. This works well for simple things like tossing a coin a fixed number of times. We can analytically deduce that the number of heads we get follows the binomial distribution, and get a confidence interval on this number, given the required [level of significance](https://en.wikipedia.org/wiki/P-value#Definition_and_interpretation). If the observed number of heads doesn't belong to this interval, we can conclude that the coin is biased. However, modeling the query execution from first principles is too complex. The best we can do is to use the hardware capabilities to estimate how fast the query could run, in principle, and try to achieve this throughput. - -For complex processes which resist modeling, a practical option is to use the historical data from the same process. We actually used to do this for ClickHouse. For each tested commit, we measured the run times for each test query and saved them into a database. We could compare the patched server to these reference values, build graphs of changes over time and so on. The main problem with this approach is systematic errors induced by environment. Sometimes the performance testing task ends up on a machine with dying HDD, or they update `atop` to a broken version that slows every kernel call in half, et cetera, ad infinitum. This is why now we employ another approach. - -We run the reference version of the server process and the tested version, simultaneously on the same machine, and run the test queries on each of them in turn, one by one. This way we eliminate most systematic errors, because both servers are equally influenced by them. We can then compare the set of results we got from the reference server process, and the set from the test server process, to see whether they look the same. Comparing the distributions using two samples is a very interesting problem in itself. We use a non-parametric bootstrap method to build a randomization distribution for the observed difference of median query run times. This method is described in detail in [[1]](#ref1), where they apply it to see how changing a fertilizer mixture changes the yield of tomato plants. ClickHouse is not much different from tomatoes, only we have to check how the changes in code influence the performance. - -This method ultimately gives a single threshold number _T_: what is the largest difference in median query run times between old and new server, that we can observe even if nothing has changed. Then we have a simple decision protocol given this threshold _T_ and the measured difference of medians _D_: - -1. _abs(D) <= T_ — the changes are not statistically significant, -2. _abs(D) <= 5%_ — the changes are too small to be important, -3. _abs(T) >= 10%_ — the test query has excessive run time variance that leads to poor sensitivity, -4. finally, _abs(D) >= T and abs(D) >= 5%_ — there are statistically significant changes of significant magnitude. - -The most interesting case are the unstable queries _(3)_. When the elapsed time changes significantly between runs even on the same version of server, it means we won't be able to detect the changes of performance, because they are going to be drowned out by the noise. Such queries tend to be the most difficult to debug, because there is no straightforward way to compare "good" and "bad" server. This topic deserves its own article which we will publish next. For now, let's consider the happy path _(4)_. This is the case of real and notable changes in performance that this system is intended to catch. What do we do next? - -# Understanding the Reasons Behind the Changes - -An investigation of code performance often starts with applying a profiler. On Linux, you would use `perf`, a sampling profiler that periodically collects the stack trace of the process, so that you can then see an aggregate picture of where your program spends the most time. In ClickHouse, we actually have a built-in sampling profiler that saves results into a system table, so no external tools are needed. It can be enabled for all queries or for a particular one, by passing the settings [as described in the docs](/docs/en/operations/optimizing-performance/sampling-query-profiler/). It is on by default, so if you use a recent version of ClickHouse, you already have a combined profile of your production server load. To visualize it, we can use a well-known script for building [flamegraphs](https://github.com/brendangregg/FlameGraph): -``` -clickhouse-client -q "SELECT - arrayStringConcat( - arrayMap( - x -> concat(splitByChar('/', addressToLine(x))[-1], - '#', demangle(addressToSymbol(x))), - trace), - ';') AS stack, - count(*) AS samples -FROM system.trace_log -WHERE trace_type = 'Real' -AND query_id = '4aac5305-b27f-4a5a-91c3-61c0cf52ec2a' -GROUP BY trace" \ -| flamegraph.pl - -``` - -As an example, let's use the test run we've seen above. The tested [pull request](https://github.com/ClickHouse/ClickHouse/pull/26248) is supposed to speed up the `sum` aggregate function for nullable integer types. Let's look at the query #8 of the test 'sum': `SELECT sum(toNullable(number)) FROM numbers(100000000)`. The test system reported that its performance increased by 38.5%, and built a "differential" variant of flamegraph for it, that shows the relative time spent in different functions. We can see that the function that calculates the sum, `DB::AggregateFunctionSumData::addManyNotNull`, now takes 15% less time. - - - -To get more leads into why the performance has changed, we can check how the various query metrics have changed between the old and the new servers. This includes all the metrics from `system.query_log.ProfileEvents`, such as `SelectedRows` or `RealTimeMicroseconds`. ClickHouse also tracks the hardware CPU metrics such as the number of branch or cache misses, using the Linux `perf_event_open` API. After downloading the test output archive, we can use a simple ad hoc [script](https://gist.github.com/akuzm/bb28a442f882349e0a5ec2b5262b97d0) to build some statistics and graphs of these metrics. - - - -This graph shows the number of branch instructions per second, on the old and the new server. We can see that the number of branch instructions has dramatically decreased, which might explain the performance difference. The tested pull request removes some `if`s and replaces them with multiplication, so this explanation sounds plausible. - -While side-to-side comparison is more robust against the systemic errors, the historical data is still very valuable for finding where a regression was introduced or investigating the unstable test queries. This is why we save the results of all test runs into a ClickHouse database. Let's consider the same query #8 from the `sum` test. We can build the history of performance changes with this [SQL query](https://play-ci.clickhouse.com/play?user=play#V0lUSCAwLjA1IEFTIHMKU0VMRUNUIG9sZF9zaGEsIG5ld19zaGEsIGV2ZW50X3RpbWUsIG1lc3NhZ2UsIG9sZF92YWx1ZSBBUyBgb2xkIHNlcnZlcmAsICAgbmV3X3ZhbHVlIEFTIGBuZXcgc2VydmVyYCwgYmVmb3JlIEFTIGBwcmV2IDExIHJ1bnNgLCBhZnRlciBBUyBgbmV4dCAxMSBydW5zYCwgICAgZGlmZiBBUyBgZGlmZiwgcmF0aW9gLCBzdGF0X3RocmVzaG9sZF9oaXN0b3JpY2FsIEFTIGBzdGF0IHRocmVzaG9sZCwgcmF0aW8sIGhpc3RvcmljYWxgLCBzdGF0X3RocmVzaG9sZCBBUyBgc3RhdCB0aHJlc2hvbGQsIHJhdGlvLCBwZXItcnVuYCwgY3B1X21vZGVsLHF1ZXJ5X2Rpc3BsYXlfbmFtZQpGUk9NIAooU0VMRUNUICosIHJ1bl9hdHRyaWJ1dGVzX3YxLnZhbHVlIEFTIGNwdV9tb2RlbCwKICAgICAgICBtZWRpYW4ob2xkX3ZhbHVlKSBPVkVSIChQQVJUSVRJT04gQlkgcnVuX2F0dHJpYnV0ZXNfdjEudmFsdWUsIHRlc3QsIHF1ZXJ5X2luZGV4LCBxdWVyeV9kaXNwbGF5X25hbWUgT1JERVIgQlkgZXZlbnRfZGF0ZSBBU0MgUk9XUyBCRVRXRUVOIDExIFBSRUNFRElORyBBTkQgQ1VSUkVOVCBST1cpIEFTIGJlZm9yZSwKICAgICAgICBtZWRpYW4obmV3X3ZhbHVlKSBPVkVSIChQQVJUSVRJT04gQlkgcnVuX2F0dHJpYnV0ZXNfdjEudmFsdWUsIHRlc3QsIHF1ZXJ5X2luZGV4LCBxdWVyeV9kaXNwbGF5X25hbWUgT1JERVIgQlkgZXZlbnRfZGF0ZSBBU0MgUk9XUyBCRVRXRUVOIENVUlJFTlQgUk9XIEFORCAxMSBGT0xMT1dJTkcpIEFTIGFmdGVyLAogICAgICAgIHF1YW50aWxlRXhhY3QoMC45NSkoYWJzKGRpZmYpKSBPVkVSIChQQVJUSVRJT04gQlkgcnVuX2F0dHJpYnV0ZXNfdjEudmFsdWUsIHRlc3QsIHF1ZXJ5X2luZGV4LCBxdWVyeV9kaXNwbGF5X25hbWUgT1JERVIgQlkgZXZlbnRfZGF0ZSBBU0MgUk9XUyBCRVRXRUVOIDM3IFBSRUNFRElORyBBTkQgQ1VSUkVOVCBST1cpIEFTIHN0YXRfdGhyZXNob2xkX2hpc3RvcmljYWwKICAgIEZST00gcGVyZnRlc3QucXVlcnlfbWV0cmljc192MgogICAgTEVGVCBKT0lOIHBlcmZ0ZXN0LnJ1bl9hdHRyaWJ1dGVzX3YxIFVTSU5HIChvbGRfc2hhLCBuZXdfc2hhKQogICAgV0hFUkUgKGF0dHJpYnV0ZSA9ICdsc2NwdS1tb2RlbC1uYW1lJykgQU5EIChtZXRyaWMgPSAnY2xpZW50X3RpbWUnKQogICAgICAgIC0tIG9ubHkgZm9yIGNvbW1pdHMgaW4gbWFzdGVyCiAgICAgICAgQU5EIChwcl9udW1iZXIgPSAwKQogICAgICAgIC0tIHNlbGVjdCB0aGUgcXVlcmllcyB3ZSBhcmUgaW50ZXJlc3RlZCBpbgogICAgICAgIEFORCAodGVzdCA9ICdzdW0nKSBBTkQgKHF1ZXJ5X2luZGV4ID0gOCkKKSBBUyB0CkFOWSBMRUZUIEpPSU4gYGdoLWRhdGFgLmNvbW1pdHMgT04gbmV3X3NoYSA9IHNoYQpXSEVSRQogICAgLS0gQ2hlY2sgZm9yIGEgcGVyc2lzdGVudCBhbmQgc2lnbmlmaWNhbnQgY2hhbmdlIGluIHF1ZXJ5IHJ1biB0aW1lLCBpbnRyb2R1Y2VkIGJ5IGEgY29tbWl0OgogICAgLS0gMSkgb24gYSBoaXN0b3JpY2FsIGdyYXBoIG9mIHF1ZXJ5IHJ1biB0aW1lLCB0aGVyZSBpcyBhIHN0ZXAgYmV0d2VlbiB0aGUgYWRqYWNlbnQgY29tbWl0cywKICAgIC0tIHRoYXQgaXMgaGlnaGVyIHRoYW4gdGhlIG5vcm1hbCB2YXJpYW5jZSwKICAgICgoKGFicyhhZnRlciAtIGJlZm9yZSkgLyBpZihhZnRlciA+IGJlZm9yZSwgYWZ0ZXIsIGJlZm9yZSkpIEFTIHN0ZXBfaGVpZ2h0KSA+PSBncmVhdGVzdChzLCBzdGF0X3RocmVzaG9sZF9oaXN0b3JpY2FsKSkKICAgIC0tIDIpIGluIHNpZGUtdG8tc2lkZSBjb21wYXJpc29uIG9mIHRoZXNlIHR3byBjb21taXRzLCB0aGVyZSB3YXMgYSBzdGF0aXN0aWNhbGx5IHNpZ25pZmljYW50IGRpZmZlcmVuY2UKICAgIC0tIHRoYXQgaXMgYWxzbyBoaWdoZXIgdGhhbiB0aGUgbm9ybWFsIHZhcmlhbmNlLAogICAgICAgIEFORCAoYWJzKGRpZmYpID49IGdyZWF0ZXN0KHN0YXRfdGhyZXNob2xkLCBzdGF0X3RocmVzaG9sZF9oaXN0b3JpY2FsLCBzKSkKICAgIC0tIDMpIGZpbmFsbHksIHRoaXMgc2lkZS10by1zaWRlIGRpZmZlcmVuY2UgaXMgb2YgbWFnbml0dWRlIGNvbXBhcmFibGUgdG8gdGhlIHN0ZXAgaW4gaGlzdG9yaWNhbCBncmFwaHMuCiAgICAgICAgQU5EIChhYnMoZGlmZikgPj0gKDAuNyAqIHN0ZXBfaGVpZ2h0KSkKb3JkZXIgYnkgZXZlbnRfdGltZSBkZXNjCmZvcm1hdCBWZXJ0aWNhbAoKCg==) to the live ClickHouse CI database. Open the link and run the query so that you can examine the query and see the result for yourself. There were three significant changes of performance throughout the test history. The most recent is a speedup in PR we started with. The second speedup is related to fully switching to clang 11. Curiously, there is also a small slowdown introduced by a PR that was supposed to speed it up instead. - -# Usability Considerations - -Regardless of how it works inside, a test system must be actually usable as a part of the development process. First and foremost, the false positive rate should be as low as possible. False positives are costly to investigate, and if they happen often, developers perceive the test as generally unreliable and tend to ignore the true positives as well. The test must also provide a concise report that makes it obvious what went wrong. We have not really succeeded in this. This test has many more failure modes than a plain functional test, and worse, some of these failures are quantitative, not binary. Much of the complexity is essential, and we try to alleviate it by providing good documentation and linking to the relevant parts of it right from the report page. Another important thing is that the user must be able to investigate a problematic query post-mortem, without running it again locally. This is why we try to export every metric and every intermediate result we have, in easily-manipulated plain text formats. - -Organizationally, it is hard to prevent devolving into a system that does a lot of busywork to just show a green check without giving any insight. I like to call this process "mining the green check", by analogy to cryptocurrencies. Our previous system did just that. It used increasingly complex heuristics tailored to each test query to prevent false positives, restarted itself many times if the results didn't look good, and so on. Ultimately, it wasted a lot of processing power without giving the real picture of the server performance. If you wanted to be sure that the performance did or did not change, you had to recheck by hand. This sorry state is the result of how the incentives are aligned around development — most of the time, the developers just want to merge their pull requests and not be bothered by some obscure test failures. Writing a good performance test query is also not always simple. Just any other query won't do — it has to give predictable performance, be not too fast and not too slow, actually measure something, and so on. After gathering more precise statistics, we discovered that several hundred of our test queries don't measure anything meaningful, e.g. they give a result that varies by 100% between runs. Another problem is that the performance often changes in statistically significant ways (true positive) with no relevant code changes (due to e.g. random differences in layout of the executable). Given all these difficulties, a working performance test system is bound to add noticeable friction to the development process. Most of the "obvious" ways to remove this friction ultimately boil down to "mining the green check". - -Implementation-wise, our system is peculiar in that it doesn't rely on well-known statistical packages, but instead heavily uses `clickhouse-local`, a tool that turns the ClickHouse SQL query processor into a command line utility Doing all the computations in ClickHouse SQL helped us find bugs and usability problems with `clickhouse-local`. The performance test continues to work in dual purpose as a heavy SQL test, and sometimes catches newly introduced bugs in complex joins and the like. The query profiler is always on in the performance tests, and this finds bugs in our fork of `libunwind`. To run the test queries, we use a third-party [Python driver](https://github.com/mymarilyn/clickhouse-driver). This is the only use of this driver in our CI, and it also helped us find some bugs in native protocol handling. A not so honorable fact is that the scaffolding consists of an unreasonable amount of bash, but this at least served to convince us that running [shellcheck](https://github.com/koalaman/shellcheck) in CI is very helpful. - -This concludes the overview of the ClickHouse performance test system. Stay tuned for the next article where we will discuss the most problematic kind of a performance test failure — the unstable query run time. - -_2021-08-20 [Alexander Kuzmenkov](https://github.com/akuzm). Title photo by [Alexander Tokmakov](https://github.com/tavplubix)_ - -References: - -1. Box, Hunter, Hunter, 2005. Statistics for experimenters, p. 78: A Randomized Design Used in the Comparison of Standard and Modified Fertilizer Mixtures for Tomato Plants. diff --git a/website/blog/en/2021/plausible-uses-clickHouse-to-power-privacy-friendly-google-analytics-alternative.md b/website/blog/en/2021/plausible-uses-clickHouse-to-power-privacy-friendly-google-analytics-alternative.md deleted file mode 100644 index f462f9b3990..00000000000 --- a/website/blog/en/2021/plausible-uses-clickHouse-to-power-privacy-friendly-google-analytics-alternative.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: 'Plausible Analytics uses ClickHouse to power their privacy-friendly Google Analytics alternative' -image: 'https://blog-images.clickhouse.com/en/2021/plausible-uses-clickHouse-to-power-privacy-friendly-google-analytics-alternative/featured-cropped.jpg' -date: '2021-12-08' -author: 'Elissa Weve' -tags: ['company'] ---- - -Plausible Analytics is a lightweight, open source web analytics tool that has quickly gained popularity as the privacy-friendly alternative to Google Analytics. By using Plausible Analytics, customers keep 100% ownership of their website data and protect the privacy of their visitors since there are no cookies and it is fully compliant with GDPR. - -Since its launch in April 2019, the analytics platform has scaled to service 5000+ paying subscribers. With an annual recurring revenue of half a million dollars, Plausible Analytics currently tracks 28,000 different websites and more than 1 billion page views per month. - -Marko Saric, co-founder at Plausible Analytics, said to handle this increase in volume, it became clear early on that the original architecture using Postgres to store analytics data could not handle the platform’s future growth. - -“We knew that if we’re going to go anywhere in the future we needed something better,” Saric said. - -## “Best technical decision we ever made” - -Through word of mouth, the Plausible team received the recommendation to try ClickHouse. They quickly noticed significant improvements in the loading speed of their dashboards. With Postgres, their dashboards were taking 5 seconds to load; Now with ClickHouse, it took less than a second. - -Plausible co-founder Uku Täht said the team also tried a couple of other solutions, but “Clickhouse came on top in terms of both performance and features that we would make use of,” he said. - -“Plausible Analytics is a lightweight product, so it is important that everything loads quickly—the dashboard, segmentation of the data, and all the cool stuff in the background. Customers don’t know what we’re doing in the background, but they know that they want a fast experience,” Saric added. - -“Plausible Analytics is a lightweight product, so it is important that everything loads quickly—the dashboard, segmentation of the data, and all the cool stuff in the background. Customers don’t know what we’re doing in the background, but they know that they want a fast experience,” Saric added. Using ClickHouse, Plausible Analytics is able to serve even its largest customers with ease, including the biggest customer, with 150 million pages per month. “This would not have been possible previously, it would have crashed everything, it would not have been able to load.,” Saric said. “There would have been no chance we could have had that kind of customer.” - -According to Täht, switching to ClickHouse was the best technical decision their team ever made. “Clickhouse is amazingly efficient, not just in terms of compute power needed but also the time that it saves us. It's very easy to work with Clickhouse. It does exactly what we need and it does it exceptionally well. It's one of those technologies that feels really simple to use but also has a rich feature set.” - -“I don’t think we would be able to be where we are today without ClickHouse,” Saric said. “Without switching from Postgres, Plausible would not have all this growth and new customers.” - -## About Plausible - -Plausible Analytics is an open-source project dedicated to making web analytics more privacy-friendly. Our mission is to reduce corporate surveillance by providing an alternative web analytics tool which doesn’t come from the AdTech world. - -Visit [plausible.io](https://plausible.io/) for more information or to start a free trial. - - diff --git a/website/blog/en/2021/reading-from-external-memory.md b/website/blog/en/2021/reading-from-external-memory.md deleted file mode 100644 index 01d35d19018..00000000000 --- a/website/blog/en/2021/reading-from-external-memory.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -title: 'A journey to io_uring, AIO and modern storage devices' -image: 'https://blog-images.clickhouse.com/en/2021/reading-from-external-memory/all-single-read.png' -date: '2021-03-09' -author: 'Ruslan Savchenko' -tags: ['Linux', 'benchmark', 'experiment'] ---- - -*While main memory is considered to be rather cheap by some systems designers it is not always possible to store everything in the main memory. When data is stored in external memory one has to think carefully how to access the data. There are several kind of storage devices and more than one system call to read from them. We performed experiments to find out how different Linux system calls perform for available devices. In total HDD, SATA SSD, NVMe SSD, and Intel Optane were accessed via single-threaded and multi-threaded pread, Linux aio, and new io_uring interfaces. Full report is available in PDF format:* [link](https://arxiv.org/pdf/2102.11198). *We give one section from the report as an example.* - -# Single Random Read - -External memory devices are block devices which means data transfer between a device and a host is done in blocks rather than single bytes. Typically 512 bytes or 4 kilobytes blocks are used. These block sizes have been chosen by manufactures long time ago and may be not the best choice for modern devices. By requesing larger amount of contigious data we can emulate larger block size. Let's find out how modern devices perform with larger blocks. - -Our goal is to pick the best block size for a random read. An application (or filesystem) can pick any block size and access data with respect to this block size. We vary block size from 4 kilobytes up to 32 megabytes. For each block size we make some random reads. Among these reads we calculate average, minimum and maximum latency as well as 99,0 and 99,9 percentiles. We use system call pread(2) in this experiment. We believe that lseek(2) followed by read(2) should have the same performance since the observed storage access time is far longer than a system call. - -## Hard Disk Drive - -This figure shows results for HDD. - -![HDD single read latency](https://blog-images.clickhouse.com/en/2021/reading-from-external-memory/hdd-single-read.png) - -The latency is almost the same for all block sizes smaller than 256 kilobytes. This happens because seek time is much larger than the data transfer time. The seek time includes arm positioning to find the right track and awaiting for platter rotation to bring data under the head. A simple consequence is that for a HDD random read one should use blocks of size at least 256 kilobytes. Even if an application use smaller blocks the drive access time would be the same. However one could still decide to use smaller blocks for better cache utilization: if the amount of data per request is small and is expected to fit in cache then storing a large block along with the requested data would actually make cache capacity smaller in terms of useful data. - -The 256 kilobyte block read takes 12 milliseconds on the average. We experienced variations from 4 milliseconds up to 25 milliseconds. This is really a huge amount of time for a computer. For example the typical process scheduling quantum is just a few milliseconds. An operating system can (and in fact does) execute other processes while our process waits for the data to arrive from the hard drive. - -## SATA SSD - -The figure below shows SATA SSD read latencies. - -![SATA SSD single read latency](https://blog-images.clickhouse.com/en/2021/reading-from-external-memory/ssd-single-read.png) - -Note that the time at the lower part of the figure is in microseconds (we use standard shortenings ms for milliseconds and us for microseconds). Reading block of size 4 kilobytes takes 140 microseconds on the average and the time growth is linear when the block size increase. Compared to HDD reading a 4 kilobyte block from SSD is 80 times faster. For a 256 kilobyte block SSD is ten times faster than HDD. When block size is large enough (starting from 4 megabytes) SSD is only two times faster than HDD. - -## NVMe SSD - -The next figure shows results for NVMe SSD. - -![NVMe SSD single read latency](https://blog-images.clickhouse.com/en/2021/reading-from-external-memory/nvme-single-read.png) - -The latency is better than those for SATA SSD. For a 4 kilobytes block size the average time improved only a little, but the 99 percentile is two times lower. It takes less than millisecond to read a megabyte block from NVMe SSD. For SATA SSD it took 3 milliseconds. As we see, upgrade from SATA SSD to NVMe SSD is not as dramatic as upgrade from HDD to SATA SSD. This is not surprising since both SATA and NVMe SSD are based on the same thechnology. Only interfaces differ. - -## Intel Optane - -This figure shows results for Intel Optane SSD. - -![Intel Optane single read latency](https://blog-images.clickhouse.com/en/2021/reading-from-external-memory/optane-single-read.png) - -Minimal latency is 12 microseconds whih is 10 times lower than those of NVMe SSD. Average latency is 1000 lower than those of HDD. There is quite large variation for small block read latency: even though the average time is quite low and close to minimal latency the maximum latency and even 99 percentile are significantly worse. If somebody looks at these results and wishes to create an Intel Optane-based service with 12 microsecond latency for reads they would have to install larger number of Intel Optane drives or consider providing more realistic timings. - -When latency is so small overheads of context switching and interrupt handling become noticeable. One can use polling mode to gain some improvement. In this mode the Linux kernel monitors the completion queue instead of switching to some other job and relying on hardware interrupt with interrupt handler to notify about completion. Clearly, it is considerable to use the polling mode only when hardware response is expected to arrive fast enough. - -![Intel Optane single read latency in polling mode](https://blog-images.clickhouse.com/en/2021/reading-from-external-memory/optane-single-hipri-read.png) - -The figure above shows results for reading from Intel Optane in polling mode. The polling mode is used when an application calls preadv2(2) system call with RWF\_HIGHPRI flag. Compared to usual pread(2) the polling mode lowers the maximum latency by a factor of two for block sizes up to 256 kilobytes. - -## Summary - -To summarize our results the next figure shows single read latencies for all four storage types on a single chart. - -![Single read latency for Optane, SSD and HDD](https://blog-images.clickhouse.com/en/2021/reading-from-external-memory/all-single-read.png) - -Starting from 4 megabytes the latency is easily predicted by linear extrapolation so we don't show larger blocks here. To show everything on a single figure we are forced to use quite an overloaded legend. We use vertical level to show the latency and we iterate the block size horizontally. For each block size we show four bars, from left to right: for Intel Optane, NVMe SSD, SATA SSD, and HDD. Storage type is represented by hatch and the latency by color. - -We see that solid state device latencies are far better than HDD. For a single read the leader is Intel Optane, however as we shall see later it has it's own drawback compared to NVMe SSD. NVMe SSD and SATA SSD look quite close to each other when the block size is small. Our observations show that the best block size for random read is 256 kilobytes for HDD, 4 kilobytes for NVMe and SATA SSD and 8 kilobytes for Intel Optane. - -So, how about testing modern IO interfaces in Linux? Continue reading the [full article](https://arxiv.org/pdf/2102.11198). - -2021-03-09 [Ruslan Savchenko](https://github.com/savrus) diff --git a/website/blog/en/2021/tests-visualization.md b/website/blog/en/2021/tests-visualization.md deleted file mode 100644 index 8b927f8976a..00000000000 --- a/website/blog/en/2021/tests-visualization.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: 'Decorating a Christmas Tree With the Help Of Flaky Tests' -image: 'https://blog-images.clickhouse.com/en/2021/tests-visualization/tests.png' -date: '2021-12-27' -author: 'Alexey Milovidov' -tags: ['tests', 'ci', 'flaky', 'christmas', 'visualization'] ---- - -Test suites and testing infrastructure are one of the main assets of ClickHouse. We have tons of functional, integration, unit, performance, stress and fuzz tests. Tests are run on a per commit basis and results are publicly available. - -We also save the results of all test runs into the database in ClickHouse. We started collecting results in June 2020, and we have 1 777 608 240 records so far. Now we run around 5 to 9 million tests every day. - -Tests are good (in general). A good test suite allows for fast development iterations, stable releases, and to accept more contributions from the community. We love tests. If there's something strange in ClickHouse, what are we gonna do? Write more tests. - -Some tests can be flaky. The reasons for flakiness are uncountable - most of them are simple timing issues in the test script itself, but sometimes if a test has failed one of a thousand times it can uncover subtle logic errors in code. - -The problem is how to deal with flaky tests. Some people suggest automatically muting the "annoying" flaky tests. Or adding automatic retries in case of failure. We believe that this is all wrong. Instead of trying to ignore flaky tests, we do the opposite: we put maximum effort into making the tests even more flaky! - -Our recipes for flaky tests: -— never mute or restart them; if the test failed once, always look and investigate the cause; -— randomize the environment for every test run so the test will have more possible reasons to fail; -— if new tests are added, run them 100 times and if at least one fails, do not merge the pull request; -— if new tests are added, use them as a corpus for fuzzing - it will uncover corner cases even if author did not write tests for them; -— [randomize thread scheduling](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ThreadFuzzer.h) and add random sleeps and switching between CPU cores at random places and before and after mutex locks/unlocks; -— run everything in parallel on slow machines; - -Key point: to prevent flaky tests, we make our tests as flaky as possible. - -## Nice Way To Visualize Flaky Tests - -There is a test suite named "[functional stateless tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/0_stateless)" that has 3772 tests. For every day since 2020-06-13 (561 days) and every test (3772 tests), I drew a picture of size 561x3772 where a pixel is green if all test runs finished successfully in the master branch during this day (for all commits and all combinations: release, debug+assertions, ASan, MSan, TSan, UBSan), and a pixel is red if at least one run failed. The pixel will be transparent if the test did not exist that day. - -This visualization is a toy that I've made for fun: - -![Visualization](https://blog-images.clickhouse.com/en/2021/tests-visualization/tree_half.png) - -It looks like a Christmas Tree (you need a bit of imagination). If you have a different kind of imagination, you can see it as a green field with flowers. - -The time is from left to right. The tests are numbered with non-unique numbers (new tests usually get larger numbers), and these numbers are on the vertical axis (newer tests on top). - -If you see red dots in a horizontal line - it is a flaky test. If you see red dots in a vertical line - it means that one day we accidentally broke the master branch. If you see black horizontal lines or cuts in the tree - it means that the tests were added with some old numbers, most likely because some long living feature branch was merged. If you see black vertical lines - it means that some days tests were not run. - -The velocity of adding new tests is represented by how tall and narrow the Christmas tree is. When we add a large number of tests, the tree grows with almost vertical slope. - -The image is prepared by [HTML page](https://github.com/ClickHouse/ClickHouse/pull/33185) with some JavaScript that is querying a ClickHouse database directly and writing to a canvas. It took around ten seconds to build this picture. I also prepared an [interactive version](https://blog-images.clickhouse.com/en/2021/tests-visualization/demo.html) with already-saved data where you can play and find your favorite tests. diff --git a/website/blog/en/2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md b/website/blog/en/2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md deleted file mode 100644 index 45a4628ed3a..00000000000 --- a/website/blog/en/2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md +++ /dev/null @@ -1,133 +0,0 @@ ---- -title: 'Admixer Aggregates Over 1 Billion Unique Users a Day using ClickHouse' -image: 'https://blog-images.clickhouse.com/en/2022/a-mixer-case-study/featured.jpg' -date: '2022-01-11' -author: 'Vladimir Zakrevsky' -tags: ['company'] ---- - -## Highlights - -* Inserting around 100 billion records per day, over 1 million records per second -* Able to aggregate over 1 billion unique users a day -* Moved from MSSQL to Azure Table Storage to ClickHouse -* ClickHouse is deployed on 15 servers with 2 TB total RAM - -Admixer is an Ad-Tech company that provides all the components to build infrastructure for advertising products for brands, ad agencies, media houses, publishers, ad networks, and other buy- and sell-side industry players looking for effective ad management. A distinctive feature of Admixer is their technology, which allows: - -* Agencies to place advertising campaigns with specified execution conditions (terms, budget, creative display settings) -* Set the rules for distributing advertising campaign budgets among thousands of publishers -* Provide accounts for publishers, where they could not only see income statistics or withdraw money but also create their advertising campaigns, as well as connect other sources of monetization in addition to Network advertising campaigns. - -Admixers products include: - -* SSP - Supply-side platform where publishers/websites offer advertising space -* DSP - Demand-side platform where advertisers buy advertising space -* ADX - Ad exchange (connects SSPs and DSPs - buyers and sellers of advertisements and advertising space) -* DMP - Data management platform (used by advertisers to configure the audience they want to target) - - Admixer provides not only access to these products but allows customers to build an entire ecosystem. - -## Why We Chose ClickHouse - -To implement the previous point, Admixer began developing an Advertising Exchange. Initially, AdExchange was based on the sale of local inventory by external DSPs. Then it began to aggregate the traffic of external SSPs to place local advertisements on it and later redirect this traffic to external DSPs. Thus, ADX was created. - -In 2015-2016, the share of external inventory was 3% (100 million requests), then at the end of 2016, it was more than 90% (3 billion requests). With a sharp increase in requests, the load on their processing increased, and most importantly, the load on the storage and provision of online analytics increased. Relational databases could not handle that many inserts for statistics records. Before migrating to Azure, we used a MSSQL server which stored the object structure and statistics. - -In 2011, when migrating to Azure, we used Azure Table Storage to store and issue statistics. But with an increase in the number of transactions and the amount of data, it was not optimal to use this solution since Azure Table Storage charges for the number of transactions and the amount of data. - -Thus we needed to: - -* Display statistics on advertising transactions in the user interface in real-time; -* Accept a significant amount (1 million records per second) of data for insertion; -* Aggregate the received data for different sections (40 operations and the same number of metrics); -* Be able to scale the data warehouse as the number of requests grew; -* Have full control over our costs. - -![Profile Report](https://blog-images.clickhouse.com/en/2022/a-mixer-case-study/profile-report.png) - -This image shows the Profile Report. Any Ad Campaign in Admixer is split by Line Items (Profiles). It is possible to overview detailed reports by each Profile including Date-Time Statistics, Geo, Domans, SSPs. This report is also updated in real time. - -## The Advantages of Using ClickHouse - -ClickHouse helps to cope with the challenges above and provides the following benefits: - -* Not tied to the platform (we decided to migrate from the cloud); -* The cluster we built allows us to receive up to a million inserts per second (and we know how to scale up on demand); -* Has built-in mechanisms for aggregating and distributing data across tables (materialized views); -* Excellent data compression; -* Reading speed makes it possible to display statistics directly in the user interface in real-time; -* Has a SQL dialect that provides the ability to build any reports; -* Has several advanced functions (and allows you to write your own) for processing statistics; -* Built-in HyperLogLog for storing rough data; -* Data sampling; -* Open source / community / good documentation; -* Constant additions of new features, bug fixes, and improvements to the current functionality; -* Convenient operations. - -## ClickHouse Architecture - -Our architecture changed from 2016 to 2020. There are two diagrams below: the state we started and the state we came to. - -![Architecture 2016](https://blog-images.clickhouse.com/en/2022/a-mixer-case-study/architecture-2016.png) - -_Architecture 2016_ - -![Architecture 2020](https://blog-images.clickhouse.com/en/2022/a-mixer-case-study/architecture-2020.png) - -_Architecture 2020_ - -Requests Handler is a component that accepts a request for an advertisement and determines which banner to display. After the banner is selected, it records this in the statistics. Since 2020, these components have been receiving over 1 million requests per second. Statistics were recorded through an intermediate element named Global Events Queue. Events were retrieved from GlobalEventsQueue, read by the EventsProcessor components, and additionally validated/enriched, then written to the ClickHouse cluster. - -Initially, we wrote from EventsProcessor in ClickHouse into several tables in parallel but then switched through Buffer-> Null-table-> MatViews. We will next investigate if the new [asynchronous insert feature](https://clickhouse.com/blog/en/2021/clickhouse-v21.11-released/#async-inserts) in version 21.11 would be an alternative to using a buffer table. - -We also reviewed the implementation of the event queue. Initially, we used Redis (but Redis is InMemory storage), thus: - -* On server restart, there was a risk of losing events; -* The amount of RAM is relatively small, and if we planned to stop the Events Processor or ClickHouse, there was a risk of overflowing the event queue, so a very high response rate to event processor problems was required. - -We tried to replace Redis and use Kafka instead, but the Kafka driver for ClickHouse at the time had issues with arrays (which has since been fixed). - -Therefore, we implemented our event queue, which was stored on the disk of each EventHandler component, and the local EventsProcessor was located on the same server. The number of EventsProcessor components has increased, which means that the number of insert requests in ClickHouse has also increased, but this was not a problem. - -Since financial optimization was also an essential factor for us, this scheme proved to be excellent in this regard as well. To receive processing and storage of data from ADX, we assembled a cluster with 15 servers (40 threads, 128 RAM, SSD storage), and we also took this with a margin. For the storage cluster for unique users, we used a cluster with 6 of the same servers. - -An important point was also the work with receiving data from clusters. If you recklessly send a request to the cluster, this could create a pretty significant load on it, leading to the slowdown of other processes. But ClickHouse has settings for limiting resources and allocating quotas for specific users, which allowed us to solve this case quickly. All configuration files can be perfectly placed in the configuration management system and managed from there. - -## ClickHouse Handles Over 1 Billion Unique Users Per Day - -In addition to statistics aggregation, which summed up metrics by dimension, Admixer provides information on how many unique users have watched ads for an arbitrary time. The number of uniques cannot be summed up. In our system, the user ID is the UUID. When we want to get several unique UUIDs for some arbitrary period, we need to recalculate the unique UUIDs for this period each time. We cannot decompose all possible combinations in advance since the intersection will be too large. - -Before using ClickHouse, we could count uniques only for predefined periods: day, week, month, all the time. Also, the number of slices was limited. Also, constant bulk requests for Aerospike slowed down the event processor. - -AggregatingMergeTree allowed us with minimal costs to count unique users by a large number of keys in one report. In the beginning, with a cluster from three servers, we could easily count 1 billion uniques per day in ~ 12 slices. There are nuances; large slices cannot be output to the interface since simultaneous scanning of large tables will take a lot of CPU time. The solution to this problem was the report generation service, which has its internal queue and sends the already generated CSV files to the interface. On the other hand, we can output small slices to the interface with a limited date range. - -ClickHouse was perfect as Big Data Storage for our ML models. - -## Advice To Others Who Might Be Considering ClickHouse - -The Devil is in the details! - -ClickHouse technical tips: - -* If you do not need high data accuracy, use HyperLogLog and sampling; -* Run load tests to determine the number of operations that your cluster can withstand given your data structure before assembling the cluster; -* Buffer is a great way to insert data, but watch out for memory; -* Use Native format for insert; -* Avoid large numbers of small parts for continuous flow insertion. Too many tables generate a lot of merges in the background such as the Too many parts (300) error; -* It is necessary to decide on the replication scheme at the beginning. One option is to use ZooKeeper and let tables replicate themselves using ReplicatedMergeTree and other replicating table engines. Because we had many tables and we wanted to choose what parts of the data to replicate to which servers we chose to not use ZooKeeper and have our client spread the writes - each write goes to two servers. - -Over the past five years, the Admixer's Core team has been working with a high-load and aggregation of big data. Any work has its subtleties, do not step on your rake. Use ours. - -We offer customers specialized audit, consultation, or create ready-made solutions using ClickHouse to solve high-load tasks. These speciality services are now offered via our new initiative [LoadFighters](https://loadfighters.com). - -### About Admixer - -Admixer is an independent adtech company that develops an ecosystem of full-stack programmatic solutions. Admixer has its own line of adtech products for brands, ad agencies, media houses, publishers, ad networks, and other buy- and sell-side industry players looking for effective ad management. Our customizable technology, in-depth expertise, and a personal approach help businesses turn programmatic advertising into a scalable revenue channel. - -Since their start in 2008, we’ve been on a mission to build an ecosystem with effective and transparent relationships between all of the players in the digital advertising industry. - -Today, the company has over 100 supply and demand partners, 3,000+ customers, and 200+ employees worldwide. They run offices in Ukraine, Belarus, Kazakhstan, Moldova, Georgia, and legal entities in the UK and Germany. - -For more information please visit: -[https://admixer.com/](https://admixer.com/) diff --git a/website/blog/en/2022/clickhouse-v22.1-released.md b/website/blog/en/2022/clickhouse-v22.1-released.md deleted file mode 100644 index 045d5367327..00000000000 --- a/website/blog/en/2022/clickhouse-v22.1-released.md +++ /dev/null @@ -1,248 +0,0 @@ ---- -title: 'What''s New in ClickHouse 22.1' -image: 'https://blog-images.clickhouse.com/en/2022/clickhouse-v22-1/featured.jpg' -date: '2022-01-26' -author: 'Alexey Milovidov' -tags: ['company', 'community'] ---- - -22.1 is our first release in the new year. It includes 2,599 new commits from 133 contributors, including 44 new contributors: - -> 13DaGGeR, Adri Fernandez, Alexey Gusev, Anselmo D. Adams, Antonio Andelic, Ben, Boris Kuschel, Christoph Wurm, Chun-Sheng, Li, Dao, DimaAmega, Dmitrii Mokhnatkin, Harry-Lee, Justin Hilliard, MaxTheHuman, Meena-Renganathan, Mojtaba Yaghoobzadeh, N. Kolotov, Niek, Orkhan Zeynalli, Rajkumar, Ryad ZENINE, Sergei Trifonov, Suzy Wang, TABLUM.IO, Vitaly Artemyev, Xin Wang, Yatian Xu, Youenn Lebras, dalei2019, fanzhou, gulige, lgbo-ustc, minhthucdao, mreddy017, msirm, olevino, peter279k, save-my-heart, tekeri, usurai, zhoubintao, 李扬. - -Don't forget to run `SELECT * FROM system.contributors` on your production server! - -Let's describe the most important new features in 22.1. - -## Schema Inference - -Let's look at the following query as an example: - -``` -SELECT * FROM url('https://datasets.clickhouse.com/github_events_v2.native.xz', Native, -$$ - file_time DateTime, - event_type Enum('CommitCommentEvent' = 1, 'CreateEvent' = 2, 'DeleteEvent' = 3, 'ForkEvent' = 4, - 'GollumEvent' = 5, 'IssueCommentEvent' = 6, 'IssuesEvent' = 7, 'MemberEvent' = 8, - 'PublicEvent' = 9, 'PullRequestEvent' = 10, 'PullRequestReviewCommentEvent' = 11, - 'PushEvent' = 12, 'ReleaseEvent' = 13, 'SponsorshipEvent' = 14, 'WatchEvent' = 15, - 'GistEvent' = 16, 'FollowEvent' = 17, 'DownloadEvent' = 18, 'PullRequestReviewEvent' = 19, - 'ForkApplyEvent' = 20, 'Event' = 21, 'TeamAddEvent' = 22), - actor_login LowCardinality(String), - repo_name LowCardinality(String), - created_at DateTime, - updated_at DateTime, - action Enum('none' = 0, 'created' = 1, 'added' = 2, 'edited' = 3, 'deleted' = 4, 'opened' = 5, 'closed' = 6, 'reopened' = 7, 'assigned' = 8, 'unassigned' = 9, - 'labeled' = 10, 'unlabeled' = 11, 'review_requested' = 12, 'review_request_removed' = 13, 'synchronize' = 14, 'started' = 15, 'published' = 16, 'update' = 17, 'create' = 18, 'fork' = 19, 'merged' = 20), - comment_id UInt64, - body String, - path String, - position Int32, - line Int32, - ref LowCardinality(String), - ref_type Enum('none' = 0, 'branch' = 1, 'tag' = 2, 'repository' = 3, 'unknown' = 4), - creator_user_login LowCardinality(String), - number UInt32, - title String, - labels Array(LowCardinality(String)), - state Enum('none' = 0, 'open' = 1, 'closed' = 2), - locked UInt8, - assignee LowCardinality(String), - assignees Array(LowCardinality(String)), - comments UInt32, - author_association Enum('NONE' = 0, 'CONTRIBUTOR' = 1, 'OWNER' = 2, 'COLLABORATOR' = 3, 'MEMBER' = 4, 'MANNEQUIN' = 5), - closed_at DateTime, - merged_at DateTime, - merge_commit_sha String, - requested_reviewers Array(LowCardinality(String)), - requested_teams Array(LowCardinality(String)), - head_ref LowCardinality(String), - head_sha String, - base_ref LowCardinality(String), - base_sha String, - merged UInt8, - mergeable UInt8, - rebaseable UInt8, - mergeable_state Enum('unknown' = 0, 'dirty' = 1, 'clean' = 2, 'unstable' = 3, 'draft' = 4), - merged_by LowCardinality(String), - review_comments UInt32, - maintainer_can_modify UInt8, - commits UInt32, - additions UInt32, - deletions UInt32, - changed_files UInt32, - diff_hunk String, - original_position UInt32, - commit_id String, - original_commit_id String, - push_size UInt32, - push_distinct_size UInt32, - member_login LowCardinality(String), - release_tag_name String, - release_name String, - review_state Enum('none' = 0, 'approved' = 1, 'changes_requested' = 2, 'commented' = 3, 'dismissed' = 4, 'pending' = 5) -$$) -``` - -In this query we are importing data with the `url` table function. Data is posted on an HTTP server in a `.native.xz` file. The most annoying part of this query is that we have to specify the data structure and the format of this file. - -In the new ClickHouse release 22.1 it becomes much easier: - -``` -SELECT * FROM url('https://datasets.clickhouse.com/github_events_v2.native.xz') -``` - -Cannot be more easy! How is that possible? - -Firstly, we detect the data format automatically from the file extension. Here it is `.native.xz`, so we know that the data is compressed by `xz` (LZMA2) compression and is represented in `Native` format. The `Native` format already contains all information about the types and names of the columns, and we just read and use it. - -It works for every format that contains information about the data types: `Native`, `Avro`, `Parquet`, `ORC`, `Arrow` as well as `CSVWithNamesAndTypes`, `TSVWithNamesAndTypes`. - -And it works for every table function that reads files: `s3`, `file`, `hdfs`, `url`, `s3Cluster`, `hdfsCluster`. - -A lot of magic happens under the hood. It does not require reading the whole file in memory. For example, Parquet format has metadata at the end of file. So, we read the header first to find where the metadata is located, then do a range request to read the metadata about columns and their types, then continue to read the requested columns. And if the file is small, it will be read with a single request. - -If you want to extract the structure from the file without data processing, the DESCRIBE query is available: - -``` -DESCRIBE url('https://datasets.clickhouse.com/github_events_v2.native.xz') -``` - -Data structure can be also automatically inferred from `JSONEachRow`, `CSV`, `TSV`, `CSVWithNames`, `TSVWithNames`, `MsgPack`, `Values` and `Regexp` formats. - -For `CSV`, either Float64 or String is inferred. For `JSONEachRow` the inference of array types is supported, including multidimensional arrays. Arrays of non-uniform types are mapped to Tuples. And objects are mapped to the `Map` data type. - -If a format does not have column names (like `CSV` without a header), the names `c1`, `c2`, ... are used. - -File format is detected from the file extension: `csv`, `tsv`, `native`, `parquet`, `pb`, `ndjson`, `orc`... For example, `.ndjson` file is recognized as `JSONEachRow` format and `.csv` is recognized as header-less `CSV` format in ClickHouse, and if you want `CSVWithNames` you can specify the format explicitly. - -We support "schema on demand" queries. For example, the autodetected data types for `TSV` format are Strings, but you can refine the types in your query with the `::` operator: - -``` -SELECT c1 AS domain, uniq(c2::UInt64), count() AS cnt - FROM file('hits.tsv') - GROUP BY domain ORDER BY cnt DESC LIMIT 10 -``` - -As a bonus, `LineAsString` and `RawBLOB` formats also get type inference. Try this query to see how I prefer to read my favorite website: - -``` -SELECT extractTextFromHTML(*) - FROM url('https://news.ycombinator.com/', LineAsString); -``` - -Schema autodetection also works while creating `Merge`, `Distributed` and `ReplicatedMegreTree` tables. When you create the first replica, you have to specify the table structure. But when creating all the subsequent replicas, you only need `CREATE TABLE hits -ENGINE = ReplicatedMegreTree(...)` without listing the columns - the definition will be copied from another replica. - -This feature is implemented by **Pavel Kruglov** with the inspiration of initial work by **Igor Baliuk** and with additions by **ZhongYuanKai**. - -## Realtime Resource Usage In clickhouse-client - -`clickhouse-client` is my favorite user interface for ClickHouse. It is an example of how friendly every command line application should be. - -Now it shows realtime CPU and memory usage for the query directly in the progress bar: - -![resource usage](https://blog-images.clickhouse.com/en/2022/clickhouse-v22-1/progress.png) - -For distributed queries, we show both total memory usage and max memory usage per host. - -This feature was made possible by implementation of distributed metrics forwarding by **Dmitry Novik**. I have added this small visualization to clickhouse-client, and now it is possible to add similar info in every client using native ClickHouse protocol. - -## Parallel Query Processing On Replicas - -ClickHouse is a distributed MPP DBMS. It can scale up to use all CPU cores on one server and scale out to use computation resources of multiple shards in a cluster. - -But each shard usually contains more than one replica. And by default ClickHouse is using the resources of only one replica on every shard. E.g. if you have a cluster of 6 servers with 3 shards and two replicas on each, a query will use just three servers instead of all six. - -There was an option to enable `max_parallel_replicas`, but that option required specifying a "sampling key", it was inconvenient to use and did not scale well. - -Now we have a setting to enable the new parallel processing algorithm: `allow_experimental_parallel_reading_from_replicas`. If it is enabled, replicas will *dynamically* select and distribute the work across them. - -It works perfectly even if replicas have lower or higher amounts of computation resources. And it gives a complete result even if some replicas are stale. - -This feature was implemented by **Nikita Mikhaylov** - -## Service Discovery - -When adding or removing nodes in a cluster, now you don't have to edit the config on every server. Just use automatic cluster and servers will register itself: - -``` -1 - - - - - - /clickhouse/discovery/auto_cluster - 1 - - - -``` - -There is no need to edit the config when adding new replicas! - -This feature was implemented by **Vladimir Cherkasov**. - -## Sparse Encoding For Columns - -If a column contains mostly zeros, we can encode it in sparse format -and automatically optimize calculations! - -It is a special column encoding, similar to `LowCardinality`, but it's completely transparent and works automatically. - -``` -CREATE TABLE test.hits ... -ENGINE = MergeTree ORDER BY ... -SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9 -``` - -It allows compressing data better and optimizes computations, because data in sparse columns will be processed directly in sparse format in memory. - -Sparse or full format is selected based on column statistics that is calculated on insert and updated on background merges. - -Developed by **Anton Popov**. - -We also want to make LowCardinality encoding automatic, stay tuned! - -## Diagnostic Tool For ClickHouse - -It is a gift from the Yandex Cloud team. They have a tool to collect a report about ClickHouse instances to provide all the needed information for support. They decided to contribute this tool to open-source! - -You can find the tool here: [utils/clickhouse-diagnostics](https://github.com/ClickHouse/ClickHouse/tree/master/ -utils/clickhouse-diagnostics) - -Developed by **Alexander Burmak**. - -## Integrations - -Plenty of new integrations were added in 22.1: - -Integration with **Hive** as a foreign table engine for SELECT queries, contributed by **Taiyang Li** and reviewed by **Ksenia Sumarokova**. - -Integration with **Azure Blob Storage** similar to S3, contributed by **Jakub Kuklis** and reviewed by **Ksenia Sumarokova**. - -Support for **hdfsCluster** table function similar to **s3Cluster**, contributed by **Zhichang Yu** and reviewed by **Nikita Mikhailov**. - -## Statistical Functions - -I hope you have always dreamed of calculating the Cramer's V and Theil's U coefficients in ClickHouse, because now we have these functions for you and you have to deal with it. - -``` -:) SELECT cramersV(URL, URLDomain) FROM test.hits - -0.98 - -:) SELECT cramersV(URLDomain, ResolutionWidth) FROM test.hits - -0.27 -``` - -It can calculate some sort of dependency between categorical (discrete) values. You can imagine it like this: there is a correlation function `corr` but it is only applicable for linear dependencies; there is a rank correlation function `rankCorr` but it is only applicable for ordered values. And now there are a few functions to calculate *something* for discrete values. - -Developers: **Artem Tsyganov**, **Ivan Belyaev**, **Alexey Milovidov**. - - -## ... And Many More - -Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for the 22.1 release and follow [the roadmap](https://github.com/ClickHouse/ClickHouse/issues/32513). diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md deleted file mode 100644 index d55b0e6bcf0..00000000000 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: 'ClickHouse 22.2 Released' -image: 'https://blog-images.clickhouse.com/en/2022/clickhouse-v22-2/featured.jpg' -date: '2022-02-23' -author: 'Alexey Milovidov' -tags: ['company', 'community'] ---- - -We prepared a new ClickHouse release 22.2, so it's nice if you have tried it on 2022-02-22. If not, you can try it today. This latest release includes 2,140 new commits from 118 contributors, including 41 new contributors: - -> Aaron Katz, Andre Marianiello, Andrew, Andrii Buriachevskyi, Brian Hunter, CoolT2, Federico Rodriguez, Filippov Denis, Gaurav Kumar, Geoff Genz, HarryLeeIBM, Heena Bansal, ILya Limarenko, Igor Nikonov, IlyaTsoi, Jake Liu, JaySon-Huang, Lemore, Leonid Krylov, Michail Safronov, Mikhail Fursov, Nikita, RogerYK, Roy Bellingan, Saad Ur Rahman, W, Yakov Olkhovskiy, alexeypavlenko, cnmade, grantovsky, hanqf-git, liuneng1994, mlkui, s-kat, tesw yew isal, vahid-sohrabloo, yakov-olkhovskiy, zhifeng, zkun, zxealous, 박동철. - -Let me tell you what is most interesting in 22.2... - -## Projections are production ready - -Projections allow you to have multiple data representations in the same table. For example, you can have data aggregations along with the raw data. There are no restrictions on which aggregate functions can be used - you can have count distinct, quantiles, or whatever you want. You can have data in multiple different sorting orders. ClickHouse will automatically select the most suitable projection for your query, so the query will be automatically optimized. - -Projections are somewhat similar to Materialized Views, which also allow you to have incremental aggregation and multiple sorting orders. But unlike Materialized Views, projections are updated atomically and consistently with the main table. The data for projections is being stored in the same "data parts" of the table and is being merged in the same way as the main data. - -The feature was developed by **Amos Bird**, a prominent ClickHouse contributor. The [prototype](https://github.com/ClickHouse/ClickHouse/pull/20202) has been available since Feb 2021, it has been merged in the main codebase by **Nikolai Kochetov** in May 2021 under experimental flag, and after 21 follow-up pull requests we ensured that it passed the full set of test suites and enabled it by default. - -Read an example of how to optimize queries with projections [in our docs](https://clickhouse.com/docs/en/getting-started/example-datasets/uk-price-paid/#speedup-with-projections). - -## Control of file creation and rewriting on data export - -When you export your data with an `INSERT INTO TABLE FUNCTION` statement into `file`, `s3` or `hdfs` and the target file already exists, you can now control how to deal with it: you can append new data into the file if it is possible, rewrite it with new data, or create another file with a similar name like 'data.1.parquet.gz'. - -Some storage systems like `s3` and some formats like `Parquet` don't support data appending. In previous ClickHouse versions, if you insert multiple times into a file with Parquet data format, you will end up with a file that is not recognized by other systems. Now you can choose between throwing exceptions on subsequent inserts or creating more files. - -So, new settings were introduced: `s3_truncate_on_insert`, `s3_create_new_file_on_insert`, `hdfs_truncate_on_insert`, `hdfs_create_new_file_on_insert`, `engine_file_allow_create_multiple_files`. - -This feature [was developed](https://github.com/ClickHouse/ClickHouse/pull/33302) by **Pavel Kruglov**. - -## Custom deduplication token - -`ReplicatedMergeTree` and `MergeTree` types of tables implement block-level deduplication. When a block of data is inserted, its cryptographic hash is calculated and if the same block was already inserted before, then the duplicate is skipped and the insert query succeeds. This makes it possible to implement exactly-once semantics for inserts. - -In ClickHouse version 22.2 you can provide your own deduplication token instead of an automatically calculated hash. This makes sense if you already have batch identifiers from some other system and you want to reuse them. It also makes sense when blocks can be identical but they should actually be inserted multiple times. Or the opposite - when blocks contain some random data and you want to deduplicate only by significant columns. - -This is implemented by adding the setting `insert_deduplication_token`. The feature was contributed by **Igor Nikonov**. - -## DEFAULT keyword for INSERT - -A small addition for SQL compatibility - now we allow using the `DEFAULT` keyword instead of a value in `INSERT INTO ... VALUES` statement. It looks like this: - -`INSERT INTO test VALUES (1, 'Hello', DEFAULT)` - -Thanks to **Andrii Buriachevskyi** for this feature. - -## EPHEMERAL columns - -A column in a table can have a `DEFAULT` expression like `c INT DEFAULT a + b`. In ClickHouse you can also use `MATERIALIZED` instead of `DEFAULT` if you want the column to be always calculated with the provided expression instead of allowing a user to insert data. And you can use `ALIAS` if you don't want the column to be stored at all but instead to be calculated on the fly if referenced. - -Since version 22.2 a new type of column is added: `EPHEMERAL` column. The user can insert data into this column but the column is not stored in a table, it's ephemeral. The purpose of this column is to provide data to calculate other columns that can reference it with `DEFAULT` or `MATERIALIZED` expressions. - -This feature was made by **Yakov Olkhovskiy**. - -## Improvements for multi-disk configuration - -You can configure multiple disks to store ClickHouse data instead of managing RAID and ClickHouse will automatically manage the data placement. - -Since version 22.2 ClickHouse can automatically repair broken disks without server restart by downloading the missing parts from replicas and placing them on the healthy disks. - -This feature was implemented by **Amos Bird** and is already being used for more than 1.5 years in production at Kuaishou. - -Another improvement is the option to specify TTL MOVE TO DISK/VOLUME **IF EXISTS**. It allows replicas with non-uniform disk configuration and to have one replica to move old data to cold storage while another replica has all the data on hot storage. Data will be moved only on replicas that have the specified disk or volume, hence *if exists*. This was developed by **Anton Popov**. - -## Flexible memory limits - -We split per-query and per-user memory limits into a pair of hard and soft limits. The settings `max_memory_usage` and `max_memory_usage_for_user` act as hard limits. When memory consumption is approaching the hard limit, an exception will be thrown. Two other settings: `max_guaranteed_memory_usage` and `max_guaranteed_memory_usage_for_user` act as soft limits. - -A query will be allowed to use more memory than a soft limit if there is available memory. But if there will be memory shortage (relative to the per-user hard limit or total per-server memory consumption), we calculate the "overcommit ratio" - how much more memory every query is consuming relative to the soft limit - and we will kill the most overcommitted query to let other queries run. - -In short, your query will not be limited to a few gigabytes of RAM if you have hundreds of gigabytes available. - -This experimental feature was implemented by **Dmitry Novik** and is continuing to be developed. - -## Shell-style comments in SQL - -Now we allow comments starting with `# ` or `#!`, similar to MySQL. The variant with `#!` allows using shell scripts with "shebang" interpreted by `clickhouse-local`. - -This feature was contributed by **Aaron Katz**. Very nice. - - -## And many more... - -Maxim Kita, Danila Kutenin, Anton Popov, zhanglistar, Federico Rodriguez, Raúl Marín, Amos Bird and Alexey Milovidov have contributed a ton of performance optimizations for this release. We are obsessed with high performance, as usual. :) - -Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for the 22.2 release and follow [the roadmap](https://github.com/ClickHouse/ClickHouse/issues/32513). diff --git a/website/blog/en/2022/opensee-analyzing-terabytes-of-financial-data-a-day-with-clickhouse.md b/website/blog/en/2022/opensee-analyzing-terabytes-of-financial-data-a-day-with-clickhouse.md deleted file mode 100644 index 25d9fd3e965..00000000000 --- a/website/blog/en/2022/opensee-analyzing-terabytes-of-financial-data-a-day-with-clickhouse.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: 'Opensee: Analyzing Terabytes of Financial Data a Day With ClickHouse' -image: 'https://blog-images.clickhouse.com/en/2022/opensee/featured.png' -date: '2022-02-22' -author: 'Christophe Rivoire, Elena Bessis' -tags: ['company', 'community'] ---- - -We’d like to welcome Christophe Rivoire (UK Country Manager) and Elena Bessis (Product Marketing Assistant) from Opensee as guests to our blog. Today, they’re telling us how their product, powered by ClickHouse, allows financial institutions’ business users to directly harness 100% of their vast quantities of data instantly and on demand, with no size limitations. - -Opensee is a financial technology company providing real time self-service analytics solutions to financial institutions, which help them turn their big data challenges into a competitive advantage — unlocking vital opportunities led by business users. Opensee, formerly ICA, was started by a team of financial industry and technology experts frustrated that no simple big data analytics solution enabled them to dive deeper into all their data easily and efficiently, or perform what-if analysis on the hundreds of terabytes of data they were handling. - -So they built their own. - - -## ClickHouse For Trillions Of Financial Data Points - -Financial institutions have always been storing a lot of data (customer data, risk data, transaction data...) for their own decision processes and for regulatory reasons. Since the financial crisis, regulators all around the world have been significantly increasing the reporting requirements, insisting on longer historical ranges and deeper granularity. This combination has generated an exponential amount of data, which has forced financial institutions to review and upgrade their infrastructure. Opensee offers a solution to navigate all these very large data cubes, based on millions, billions or even trillions of data points. In order to build it, a data storage system capable of scaling horizontally with data and with fast OLAP query response time was required. In 2016, after thorough evaluation, Opensee concluded ClickHouse was the obvious solution. - -There are many use cases that involve storing and leveraging massive amounts of data on a daily basis, but Opensee built from the strength of their own expertise, evaluating risk linked to activities in the financial market. There are various types of risks (market risk, credit risk, liquidity risk…) and all of them need to aggregate a lot of data in order to calculate linear or non-linear indicators, both business and regulatory, and analyze all those numbers on the fly. - -!["Dashboard in Opensee for a Market Risk use case"](https://blog-images.clickhouse.com/en/2022/opensee/dashboard.png) -_Dashboard in Opensee for a Market Risk use case_ - - -## ClickHouse for Scalability, Granularity, Speed and Cost Control - -Financial institutions have sometimes believed that their ability to craft efficient storage solutions like data lakes for their vast amounts of data, typically built on a Hadoop stack, would make real-time analytics available. Unfortunately, many of these systems are too slow for at-scale analytics. - -Running a query on a Hadoop data lake is just not an option for users with real-time needs! Banks experimented with different types of analytical layers between the data lakes and the users, in order to allow access to their stored data and to run analytics, but ran into new challenges: in-memory computing solutions have a lack of scalability and high hardware costs. Others tried query accelerators but were forced to analyze only prepared data (pre-aggregated or specifically indexed data), losing the granularity which is always required to understand things like daily changes. More recently, financial institutions have been contemplating cloud database management systems, but for very large datasets and calculations the speed of these services is far from what ClickHouse can achieve for their specific use cases. - -Ultimately, none of these technologies could simultaneously combine scalability, granularity, speed and cost control, forcing financial institutions into a series of compromises. With Opensee, there is no need to compromise: the platform leverages ClickHouse's capacity to handle the huge volume that data lakes require and the fast response that in-memory databases can give, without the need to pre-aggregate the data. - - - -!["Dashboard in Opensee for a Market Risk use case"](https://blog-images.clickhouse.com/en/2022/opensee/pivot-table.png) -_Pivot table from the Opensee UI on a liquidity use case_ - - -## Opensee Architecture - -Opensee provides a series of APIs which allows users to fully abstract all the complexity and in particular the physical data model. These APIs are typically used for data ingestion, data query, model management, etc. Thanks to Opensee’s low-code API, users don’t need to access data through complex quasi-SQL queries, but rather through simple business queries that are optimized by Opensee to deliver performance. Opensee’s back end, which provides indirect access to Clickhouse, is written in Scala, while PostgreSQL contains all the configuration and context data that must be managed transactionally. Opensee also provides various options for front ends (dedicated Opensee web or rich user interface, Excel, others…) to interact with the data, navigate through the cube and leverage functionality like data versioning — built for the financial institution’s use. - - - -!["Dashboard in Opensee for a Market Risk use case"](https://blog-images.clickhouse.com/en/2022/opensee/architecture-chart.png) -_Opensee architecture chart_ - - -## Advantages of ClickHouse - -For Opensee, the most valuable feature is horizontal scalability, the capability to shard the data. Next comes the very fast dictionary lookup, rapid calculations with vectorization and the capability to manage array values. In the financial industry, where time series or historical data is everywhere, this capacity to calculate vectors and manage array values is critical. - -On top of being a solution that is extremely fast and efficient, other advantages include: - - -- distributed and replicated, with high availability and a performant map/reduce system -- wide range of features fit for analytics -- really good and extensive format support (csv, json, parquet, orc, protobuf ....) -- very rapid evolutions through the high contributions of a wide community to a very popular Open Source technology - -On top of these native ClickHouse strengths and functionalities, Opensee has developed a lot of other functionalities dedicated to financial institutions. To name only a few, a data versioning mechanism has been created allowing business users to either correct on the fly inaccurate data or simulate new values. This ‘What If’ simulation feature can be used to add, amend or delete transactions,with full auditability and traceability, without deleting any data. - -Another key feature is a Python processor which is available to define more complex calculations. Furthermore, the abstraction model layer has been built to remove the complexity of the physical data model for the users and optimize the queries. And, last but not least, in terms of visualization, a UI dedicated to financial institutions has been developed with and for its users. - - -## Dividing Hardware Costs By 10+ - -The cost efficiency factor is a key improvement for large financial institutions typically using in-memory computing technology. Dividing by ten (and sometimes more) the hardware cost is no small achievement! Being able to use very large datasets on standard servers on premise or in the cloud is a big achievement. With Opensee powered by ClickHouse, financial institutions are able to alleviate critical limitations of their existing solutions, avoiding legacy compromises and a lack of flexibility. Finally, these organizations are able to provide their users a turn-key solution to analyze all their data sets, which used to be siloed, in one single place, one single data model, one single infrastructure, and all of that in real time, combining very granular and very long historical ranges. - -## About Opensee - -Opensee empowers financial data divers to analyze deeper and faster. Headquartered in Paris, with offices in London and New York, Opensee is working with a trusted client base across global Tier 1 banks, asset managers, hedge funds and trading platforms. - -For more information please visit [www.opensee.io](http://www.opensee.io) or follow them on [LinkedIn](https://www.linkedin.com/company/opensee-company) and [Twitter](https://twitter.com/opensee_io). diff --git a/website/blog/en/index.md b/website/blog/en/index.md deleted file mode 100644 index 227a69408dc..00000000000 --- a/website/blog/en/index.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -is_index: true ---- diff --git a/website/blog/en/redirects.txt b/website/blog/en/redirects.txt deleted file mode 100644 index 2e4cd6e2dd4..00000000000 --- a/website/blog/en/redirects.txt +++ /dev/null @@ -1,33 +0,0 @@ -clickhouse-meetup-in-berlin-october-5-2017.md 2017/clickhouse-meetup-in-berlin-october-5-2017.md -clickhouse-meetup-at-berlin-october-5-2017.md 2017/clickhouse-meetup-in-berlin-october-5-2017.md -clickhouse-meetup-in-santa-clara-may-4-2017.md 2017/clickhouse-meetup-in-santa-clara-may-4-2017.md -clickhouse-meetup-at-santa-clara-may-4-2017.md 2017/clickhouse-meetup-in-santa-clara-may-4-2017.md -clickhouse-community-meetup-in-berlin-on-july-3.md 2018/announcing-clickhouse-community-meetup-in-berlin-on-july-3.md -evolution-of-data-structures-in-yandex-metrica.md 2016/evolution-of-data-structures-in-yandex-metrica.md -how-to-update-data-in-clickhouse.md 2016/how-to-update-data-in-clickhouse.md -yandex-opensources-clickhouse.md 2016/yandex-opensources-clickhouse.md -clickhouse-at-data-scale-2017.md 2017/clickhouse-at-data-scale-2017.md -clickhouse-meetup-in-berlin-october-5-2017.md 2017/clickhouse-meetup-in-berlin-october-5-2017.md -join-the-clickhouse-meetup-in-berlin.md 2017/join-the-clickhouse-meetup-in-berlin.md -clickhouse-at-percona-live-2017.md 2017/clickhouse-at-percona-live-2017.md -clickhouse-meetup-in-santa-clara-may-4-2017.md 2017/clickhouse-meetup-in-santa-clara-may-4-2017.md -announcing-clickhouse-meetup-in-amsterdam-on-november-15.md 2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md -clickhouse-community-meetup-in-berlin-on-july-3-2018.md 2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md -clickhouse-at-analysys-a10-2018.md 2018/clickhouse-at-analysys-a10-2018.md -clickhouse-community-meetup-in-berlin-on-july-3.md 2018/clickhouse-community-meetup-in-berlin-on-july-3.md -clickhouse-community-meetup-in-paris-on-october-2-2018.md 2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md -clickhouse-community-meetup-in-beijing-on-october-28-2018.md 2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md -clickhouse-meetup-in-amsterdam-on-november-15-2018.md 2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md -clickhouse-community-meetup-in-beijing-on-january-27-2018.md 2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md -clickhouse-at-percona-live-europe-2018.md 2018/clickhouse-at-percona-live-europe-2018.md -concept-cloud-mergetree-tables.md 2018/concept-cloud-mergetree-tables.md -clickhouse-meetup-in-limassol-on-may-7-2019.md 2019/clickhouse-meetup-in-limassol-on-may-7-2019.md -schedule-of-clickhouse-meetups-in-china-for-2019.md 2019/schedule-of-clickhouse-meetups-in-china-for-2019.md -clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md 2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md -clickhouse-meetup-in-san-francisco-on-june-4-2019.md 2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md -how-to-speed-up-lz4-decompression-in-clickhouse.md 2019/how-to-speed-up-lz4-decompression-in-clickhouse.md -clickhouse-at-percona-live-2019.md 2019/clickhouse-at-percona-live-2019.md -clickhouse-meetup-in-madrid-on-april-2-2019.md 2019/clickhouse-meetup-in-madrid-on-april-2-2019.md -clickhouse-meetup-in-beijing-on-june-8-2019.md 2019/clickhouse-meetup-in-beijing-on-june-8-2019.md -five-methods-for-database-obfuscation.md 2020/five-methods-for-database-obfuscation.md -2022/admixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md 2022/a-mixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md diff --git a/website/careers/index.html b/website/careers/index.html deleted file mode 100644 index 3505da3df94..00000000000 --- a/website/careers/index.html +++ /dev/null @@ -1,32 +0,0 @@ -{% set prefetch_items = [ - ('/docs/en/', 'document') -] %} - -{% extends "templates/base.html" %} - -{% block extra_meta %} -{% include "templates/common_fonts.html" %} -{% endblock %} - -{% block banner %} - -{% include "templates/global/banner.html" %} - -{% endblock %} - -{% block nav %} - -{% include "templates/global/nav.html" %} - -{% endblock %} - -{% block content %} - -{% include "templates/careers/hero.html" %} -{% include "templates/careers/overview.html" %} -{% include "templates/careers/greenhouse.html" %} - -{% include "templates/global/newsletter.html" %} -{% include "templates/global/github_stars.html" %} - -{% endblock %} diff --git a/website/company/index.html b/website/company/index.html deleted file mode 100644 index 62bb0bdad23..00000000000 --- a/website/company/index.html +++ /dev/null @@ -1,35 +0,0 @@ -{% set prefetch_items = [ - ('/docs/en/', 'document') -] %} - -{% extends "templates/base.html" %} - -{% block extra_meta %} -{% include "templates/common_fonts.html" %} -{% endblock %} - -{% block banner %} - -{% include "templates/global/banner.html" %} - -{% endblock %} - -{% block nav %} - -{% include "templates/global/nav.html" %} - -{% endblock %} - -{% block content %} - -{% include "templates/company/hero.html" %} -{% include "templates/company/overview.html" %} -{% include "templates/company/founders.html" %} -{% include "templates/company/team.html" %} -{% include "templates/company/press.html" %} -{% include "templates/company/contact.html" %} - -{% include "templates/global/newsletter.html" %} -{% include "templates/global/github_stars.html" %} - -{% endblock %} diff --git a/website/legal/trademark-policy/index.html b/website/legal/trademark-policy/index.html deleted file mode 100644 index 73da51b140a..00000000000 --- a/website/legal/trademark-policy/index.html +++ /dev/null @@ -1,31 +0,0 @@ -{% set prefetch_items = [ - ('/docs/en/', 'document') -] %} - -{% extends "templates/base.html" %} - -{% block extra_meta %} -{% include "templates/common_fonts.html" %} -{% endblock %} - -{% block banner %} - -{% include "templates/global/banner.html" %} - -{% endblock %} - -{% block nav %} - -{% include "templates/global/nav.html" %} - -{% endblock %} - -{% block content %} - -{% include "templates/trademark-policy/hero.html" %} -{% include "templates/trademark-policy/content.html" %} - -{% include "templates/global/newsletter.html" %} -{% include "templates/global/github_stars.html" %} - -{% endblock %} diff --git a/website/locale/en/LC_MESSAGES/messages.mo b/website/locale/en/LC_MESSAGES/messages.mo deleted file mode 100644 index 370e2c731503853e004fc6df51418a54ea63c5ba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6243 zcmeH~-ESjT8HW#07McS60G84gak{L!(9MJ-AXu^=t&?~YFHRg{Z^HsY8qXX%hm7Z# zIWzI5LR_?W6|DfNRNSBn!3|dkfly0Zae)LC5(uG6fCNHZ0Obl$fl5$`=N-=`&LYGg zKqA|Z$LH()_`TAn3w{>f0l!-DS$HSUzktW#OYkl5rejKd8EW_s z_%y7+Z@o?_Ug{-2-U$B%8KR~zPS&{vO8W_TBfJ-ieeZ|!@I&wf7ogbl6&S&% z;4SbS41PPDfw#d`D0Y4v%6uaz>wgAbfRDhN;PX}c3sBcTPoE>B;RX~~lA$STtTD5-%@>1X9<5BE? z9*Tckh(hAA1I51wq4@PVDDylHMNdyciSJWT=6w$GQopbGcPQhJktEU6?NIV}uHqJy zJY`Vy^cWO9eI1IPo`#~Q4wHzUnj}}^`WO_uzXZk4uR&So8&&%=P~!FjDE9pX&cmNW z(bF-Ul(?LRV$T^UdTK+KRNsY?w;w@m4{o_#dbP`JZBPi_-O8jyt z`FR+M-H%l5UxX6RCm>U*C!y%++fe+t_pM_+orBWvqXb9BhfwUYa2h@gSKv3H=;>uB zdYXINI8P%edU~jO{yvnr{1}R!UV!4qFQMq^4^Zs=6BK{`Qt=fialVnHOWbaTqJz8O zDR>G>dke~Zm!Yb~vCXYZ!$e1JFfeKERV~YHlo-7k<~HrCn%~i3x~D%~7%yGDTC{z= z9Oj`~ENqhNUgidBX*;0a*Nn-F;bf0iuV*96;vMYBO{Qy6U5~@L%hJ7PR(SFb8W}tJ4a+oA;_r&$mq~!}s0N zBbO!d$i`+`cZ(d$%pR>p5~rtfO&EvHd)p;sA__O|p|?#k)UKx!C$_q*AEx#}lrp0? z5`)Xrx@VJw$%qt-547#+Jy+;aNDOkvycl4#OL=Gezl!ntQ0(y{H{qakK(y;HFB4^Y zm(EP4sZwC%Vxw7eSacKXw@E^g#*AQ?t|1gKKQpVFHZrL<%-{w?=2gosDkMLo!KJD` z2yHU{)UowZK?I~;>SyQhuRCP6R5F+>XEA;iHC z@g*+JBM-3-yAolW>s=d;J4wV$e79P46fJX1tTV10uNvdnZil<3EbxJou#v0&HrgS+ z`xQ@Gj6n{Wp@SpMcpT;WN=T5dPEK70J+_&N@?=kkxoX-SDr&zh?o(?;lG{L>krhOe zi76i^TXvCz1hnitEHbo&pd>euraJpLtJcGTQR`-OPulgvv-HSSi#vBhL)iyV4 z>zziuty`OVskOe`=rmgEJYCea^(*?rjrHYeZ73@AX|5s!Una{AhKV)tjOv6^brjhW zX+msXb&zk*$*v(vknHiZ^eBaOSaxM*wnI;;5zGP7x2Xy8EKGeu5aJoVh|-EQk;IP6 zYGk_J=EhHp0C6>SxsJ;cO-qILlD!$-k&-@`hVs0c>%{IFjNeb1>|0FbGPIMpwC~wo zuz_RbKtk9bn{sbJoG`CCmkxEd=`I2KZ}q}# zAu%iSxht6&No|a`#>0#L&v`gc$t%a8k%6K-a*aYnbwp8iZ*8RpeS=03&76*S_Y62pSt*lb;@FLm?-_02X_IJH@x ztihV^+c;P(`abBm1zlgOHJbO=mX|l{?e^40s}n4tFfl3Mo@5sE{OtT}F#CaE?t^-6 zeqnz0^z8d)XXzMhnq4bfUDfYOy|x*wwSwiEUTrO`HBNu5wO*fUa$yQORSCLmL0`Pw zZ0hyeT3w$zwDbkKru~A=bwQhS{~_pb7RoRBT(fz9VrNj-g{h6C$haF^|c(f4IQ_2_7E$A~{n@_Eb?=Xf~?k(uWu*)ekKXZl^j&7#=ho(NdnGTO`ru%o1 bqnqi`&GhJIdUP|*%YSK(Zl?bSH`9LudZ%4g diff --git a/website/locale/en/LC_MESSAGES/messages.po b/website/locale/en/LC_MESSAGES/messages.po deleted file mode 100644 index bc8c78f051c..00000000000 --- a/website/locale/en/LC_MESSAGES/messages.po +++ /dev/null @@ -1,379 +0,0 @@ -# English translations for PROJECT. -# Copyright (C) 2020 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 2020. -# -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-06-17 12:20+0300\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language: en\n" -"Language-Team: en \n" -"Plural-Forms: nplurals=2; plural=(n != 1)\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.8.0\n" - -#: templates/common_meta.html:1 -msgid "" -"ClickHouse is a fast open-source column-oriented database management " -"system that allows generating analytical data reports in real-time using " -"SQL queries" -msgstr "" -"ClickHouse is a fast open-source column-oriented database management " -"system that allows generating analytical data reports in real-time using " -"SQL queries" - -#: templates/common_meta.html:6 -msgid "ClickHouse - fast open-source OLAP DBMS" -msgstr "ClickHouse - fast open-source OLAP DBMS" - -#: templates/common_meta.html:10 -msgid "ClickHouse DBMS" -msgstr "ClickHouse DBMS" - -#: templates/common_meta.html:32 -msgid "open-source" -msgstr "open-source" - -#: templates/common_meta.html:32 -msgid "relational" -msgstr "relational" - -#: templates/common_meta.html:32 -msgid "analytics" -msgstr "analytics" - -#: templates/common_meta.html:32 -msgid "analytical" -msgstr "analytical" - -#: templates/common_meta.html:32 -msgid "Big Data" -msgstr "Big Data" - -#: templates/common_meta.html:32 -msgid "web-analytics" -msgstr "web-analytics" - -#: templates/footer.html:8 -msgid "ClickHouse source code is published under the Apache 2.0 License." -msgstr "ClickHouse source code is published under the Apache 2.0 License." - -#: templates/footer.html:8 -msgid "" -"Software is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR " -"CONDITIONS OF ANY KIND, either express or implied." -msgstr "" -"Software is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR " -"CONDITIONS OF ANY KIND, either express or implied." - -#: templates/footer.html:11 -msgid "Yandex LLC" -msgstr "Yandex LLC" - -#: templates/blog/content.html:20 templates/blog/content.html:25 -#: templates/blog/content.html:30 -msgid "Share on" -msgstr "Share on" - -#: templates/blog/content.html:37 -msgid "Published date" -msgstr "Published date" - -#: templates/blog/nav.html:20 -msgid "New post" -msgstr "New post" - -#: templates/blog/nav.html:25 -msgid "Documentation" -msgstr "Documentation" - -#: templates/docs/footer.html:3 -msgid "Rating" -msgstr "Rating" - -#: templates/docs/footer.html:3 -msgid "votes" -msgstr "votes" - -#: templates/docs/footer.html:4 -msgid "Article Rating" -msgstr "Article Rating" - -#: templates/docs/footer.html:4 -msgid "Was this content helpful?" -msgstr "Was this content helpful?" - -#: templates/docs/footer.html:7 -msgid "Unusable" -msgstr "Unusable" - -#: templates/docs/footer.html:7 -msgid "Poor" -msgstr "Poor" - -#: templates/docs/footer.html:7 -msgid "Good" -msgstr "Good" - -#: templates/docs/footer.html:7 -msgid "Excellent" -msgstr "Excellent" - -#: templates/docs/footer.html:8 -msgid "documentation" -msgstr "documentation" - -#: templates/docs/footer.html:15 -msgid "Built from" -msgstr "Built from" - -#: templates/docs/footer.html:15 -msgid "published on" -msgstr "published on" - -#: templates/docs/footer.html:15 -msgid "modified on" -msgstr "modified on" - -#: templates/docs/machine-translated.html:3 -msgid "Help wanted!" -msgstr "Help wanted!" - -#: templates/docs/machine-translated.html:4 -msgid "" -"The following content of this documentation page has been machine-" -"translated. But unlike other websites, it is not done on the fly. This " -"translated text lives on GitHub repository alongside main ClickHouse " -"codebase and waits for fellow native speakers to make it more human-" -"readable." -msgstr "" -"The following content of this documentation page has been machine-" -"translated. But unlike other websites, it is not done on the fly. This " -"translated text lives on GitHub repository alongside main ClickHouse " -"codebase and waits for fellow native speakers to make it more human-" -"readable." - -#: templates/docs/machine-translated.html:4 -msgid "You can also use the original English version as a reference." -msgstr "You can also use the original English version as a reference." - -#: templates/docs/machine-translated.html:7 -msgid "Help ClickHouse documentation by editing this page" -msgstr "Help ClickHouse documentation by editing this page" - -#: templates/docs/sidebar.html:3 -msgid "Multi-page or single-page" -msgstr "Multi-page or single-page" - -#: templates/docs/sidebar.html:5 -msgid "Multi-page version" -msgstr "Multi-page version" - -#: templates/docs/sidebar.html:8 -msgid "Single-page version" -msgstr "Single-page version" - -#: templates/docs/sidebar.html:13 -msgid "Version" -msgstr "Version" - -#: templates/docs/sidebar.html:13 templates/docs/sidebar.html:19 -msgid "latest" -msgstr "latest" - -#: templates/docs/sidebar.html:36 -msgid "PDF version" -msgstr "PDF version" - -#: templates/docs/toc.html:8 -msgid "Table of Contents" -msgstr "Table of Contents" - -#: templates/index/community.html:4 -msgid "ClickHouse community" -msgstr "ClickHouse community" - -#: templates/index/community.html:13 templates/index/community.html:14 -msgid "ClickHouse YouTube Channel" -msgstr "ClickHouse YouTube Channel" - -#: templates/index/community.html:25 templates/index/community.html:26 -msgid "ClickHouse Official Twitter Account" -msgstr "ClickHouse Official Twitter Account" - -#: templates/index/community.html:36 templates/index/community.html:37 -msgid "ClickHouse at Telegram" -msgstr "ClickHouse at Telegram" - -#: templates/index/community.html:41 -msgid "Chat with real users in " -msgstr "Chat with real users in " - -#: templates/index/community.html:44 templates/index/community.html:116 -msgid "English" -msgstr "English" - -#: templates/index/community.html:45 -msgid "or in" -msgstr "or in" - -#: templates/index/community.html:47 templates/index/community.html:117 -msgid "Russian" -msgstr "Russian" - -#: templates/index/community.html:65 -msgid "Open GitHub issue to ask for help or to file a feature request" -msgstr "Open GitHub issue to ask for help or to file a feature request" - -#: templates/index/community.html:76 templates/index/community.html:77 -msgid "ClickHouse Slack Workspace" -msgstr "ClickHouse Slack Workspace" - -#: templates/index/community.html:82 -msgid "Multipurpose public hangout" -msgstr "Multipurpose public hangout" - -#: templates/index/community.html:101 -msgid "Ask any questions" -msgstr "Ask any questions" - -#: templates/index/community.html:115 -msgid "ClickHouse Blog" -msgstr "ClickHouse Blog" - -#: templates/index/community.html:116 -msgid "in" -msgstr "in" - -#: templates/index/community.html:128 templates/index/community.html:129 -msgid "ClickHouse at Google Groups" -msgstr "ClickHouse at Google Groups" - -#: templates/index/community.html:133 -msgid "Email discussions" -msgstr "Email discussions" - -#: templates/index/community.html:142 -msgid "Like ClickHouse?" -msgstr "Like ClickHouse?" - -#: templates/index/community.html:143 -msgid "Help to spread the word about it via" -msgstr "Help to spread the word about it via" - -#: templates/index/community.html:144 -msgid "and" -msgstr "and" - -#: templates/index/community.html:153 -msgid "Hosting ClickHouse Meetups" -msgstr "Hosting ClickHouse Meetups" - -#: templates/index/community.html:157 -msgid "" -"ClickHouse meetups are essential for strengthening community worldwide, " -"but they couldn't be possible without the help of local organizers. " -"Please, fill this form if you want to become one or want to meet " -"ClickHouse core team for any other reason." -msgstr "" -"ClickHouse meetups are essential for strengthening community worldwide, " -"but they couldn't be possible without the help of local organizers. " -"Please, fill this form if you want to become one or want to meet " -"ClickHouse core team for any other reason." - -#: templates/index/community.html:159 -msgid "ClickHouse Meetup" -msgstr "ClickHouse Meetup" - -#: templates/index/community.html:165 -msgid "Name" -msgstr "Name" - -#: templates/index/community.html:168 -msgid "Email" -msgstr "Email" - -#: templates/index/community.html:171 -msgid "Company" -msgstr "Company" - -#: templates/index/community.html:174 -msgid "City" -msgstr "City" - -#: templates/index/community.html:179 -msgid "We'd like to host a public ClickHouse Meetup" -msgstr "We'd like to host a public ClickHouse Meetup" - -#: templates/index/community.html:185 -msgid "We'd like to invite ClickHouse team to our office" -msgstr "We'd like to invite ClickHouse team to our office" - -#: templates/index/community.html:191 -msgid "We'd like to invite ClickHouse team to another event we organize" -msgstr "We'd like to invite ClickHouse team to another event we organize" - -#: templates/index/community.html:197 -msgid "We're interested in commercial consulting, support or managed service" -msgstr "We're interested in commercial consulting, support or managed service" - -#: templates/index/community.html:201 -msgid "Additional comments" -msgstr "Additional comments" - -#: templates/index/community.html:203 -msgid "Send" -msgstr "Send" - -#: templates/index/community.html:212 -msgid "" -"If you have any more thoughts or questions, feel free to contact Yandex " -"ClickHouse team directly at" -msgstr "" -"If you have any more thoughts or questions, feel free to contact Yandex " -"ClickHouse team directly at" - -#: templates/index/community.html:213 -msgid "turn on JavaScript to see email address" -msgstr "turn on JavaScript to see email address" - -#~ msgid "" -#~ "ClickHouse is an open source column-" -#~ "oriented database management system that " -#~ "allows generating analytical data reports " -#~ "in real time using SQL queries." -#~ msgstr "" -#~ "ClickHouse is an open source column-" -#~ "oriented database management system that " -#~ "allows generating analytical data reports " -#~ "in real time using SQL queries." - -#~ msgid "" -#~ "ClickHouse is an open source distributed" -#~ " column-oriented database management system" -#~ " that allows generating analytical data " -#~ "reports in real time using SQL " -#~ "queries. Сreated by ClickHouse " -#~ "manages extremely large volumes of data" -#~ " in a stable and sustainable manner." -#~ msgstr "" -#~ "ClickHouse is an open source distributed" -#~ " column-oriented database management system" -#~ " that allows generating analytical data " -#~ "reports in real time using SQL " -#~ "queries. Сreated by ClickHouse " -#~ "manages extremely large volumes of data" -#~ " in a stable and sustainable manner." - -#~ msgid "published at" -#~ msgstr "published at" - -#~ msgid "modified at" -#~ msgstr "modified at" - diff --git a/website/locale/ja/LC_MESSAGES/messages.mo b/website/locale/ja/LC_MESSAGES/messages.mo deleted file mode 100644 index 2061fefa110cade4d6e2ec94d59862b90fd1b49e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 424 zcmaJ+%TB{E5G)cWA31aIfg5abf&`VTO7sy>P(!0M>fKGYA(d-Kwu5N@gWuy@a6nK_ zthCb3?r3KASy&iqkY(JVn!-uA*!i zFP=$;jW!MvTh%2@+R8$%IgOHWJii~$W?8gcl5~;7)Sz%)0cOG>ry&a&uv-Z3Xb|#{ zU9ua-JP|T{tvjcGd2?YMlZ@GJ*`lo@irxy3stqE@`hXS3m`|a z<5X#dB$cfZ5+1bKS-)z&pdAH6`aO;qN#Z0r{vY%inQG-w`2_pMD?5C1*Nqgl8q(rL s7>jPyx(y7Dye{bm4MeKYWvwbs2kY9A$Df=q*>gG(Ym_wX54=O~6HiZjvj6}9 diff --git a/website/locale/ja/LC_MESSAGES/messages.po b/website/locale/ja/LC_MESSAGES/messages.po deleted file mode 100644 index c7ad2b5ae85..00000000000 --- a/website/locale/ja/LC_MESSAGES/messages.po +++ /dev/null @@ -1,326 +0,0 @@ -# Translations template for PROJECT. -# Copyright (C) 2020 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# Automatically generated, 2020. -# -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-06-17 12:20+0300\n" -"PO-Revision-Date: 2020-06-17 12:20+0300\n" -"Last-Translator: Automatically generated\n" -"Language-Team: none\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.8.0\n" -"Language: ja\n" -"Plural-Forms: nplurals=1; plural=0;\n" - -#: templates/common_meta.html:1 -msgid "" -"ClickHouse is a fast open-source column-oriented database management system " -"that allows generating analytical data reports in real-time using SQL queries" -msgstr "" - -#: templates/common_meta.html:6 -msgid "ClickHouse - fast open-source OLAP DBMS" -msgstr "" - -#: templates/common_meta.html:10 -msgid "ClickHouse DBMS" -msgstr "" - -#: templates/common_meta.html:32 -msgid "open-source" -msgstr "" - -#: templates/common_meta.html:32 -msgid "relational" -msgstr "" - -#: templates/common_meta.html:32 -msgid "analytics" -msgstr "" - -#: templates/common_meta.html:32 -msgid "analytical" -msgstr "" - -#: templates/common_meta.html:32 -msgid "Big Data" -msgstr "" - -#: templates/common_meta.html:32 -msgid "web-analytics" -msgstr "" - -#: templates/footer.html:8 -msgid "ClickHouse source code is published under the Apache 2.0 License." -msgstr "" - -#: templates/footer.html:8 -msgid "" -"Software is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR " -"CONDITIONS OF ANY KIND, either express or implied." -msgstr "" - -#: templates/footer.html:11 -msgid "Yandex LLC" -msgstr "" - -#: templates/blog/content.html:20 templates/blog/content.html:25 -#: templates/blog/content.html:30 -msgid "Share on" -msgstr "" - -#: templates/blog/content.html:37 -msgid "Published date" -msgstr "" - -#: templates/blog/nav.html:20 -msgid "New post" -msgstr "" - -#: templates/blog/nav.html:25 -msgid "Documentation" -msgstr "" - -#: templates/docs/footer.html:3 -msgid "Rating" -msgstr "" - -#: templates/docs/footer.html:3 -msgid "votes" -msgstr "" - -#: templates/docs/footer.html:4 -msgid "Article Rating" -msgstr "" - -#: templates/docs/footer.html:4 -msgid "Was this content helpful?" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Unusable" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Poor" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Good" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Excellent" -msgstr "" - -#: templates/docs/footer.html:8 -msgid "documentation" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "Built from" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "published on" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "modified on" -msgstr "" - -#: templates/docs/machine-translated.html:3 -msgid "Help wanted!" -msgstr "" - -#: templates/docs/machine-translated.html:4 -msgid "" -"The following content of this documentation page has been machine-" -"translated. But unlike other websites, it is not done on the fly. This " -"translated text lives on GitHub repository alongside main ClickHouse " -"codebase and waits for fellow native speakers to make it more human-readable." -msgstr "" - -#: templates/docs/machine-translated.html:4 -msgid "You can also use the original English version as a reference." -msgstr "" - -#: templates/docs/machine-translated.html:7 -msgid "Help ClickHouse documentation by editing this page" -msgstr "" - -#: templates/docs/sidebar.html:3 -msgid "Multi-page or single-page" -msgstr "" - -#: templates/docs/sidebar.html:5 -msgid "Multi-page version" -msgstr "" - -#: templates/docs/sidebar.html:8 -msgid "Single-page version" -msgstr "" - -#: templates/docs/sidebar.html:13 -msgid "Version" -msgstr "" - -#: templates/docs/sidebar.html:13 templates/docs/sidebar.html:19 -msgid "latest" -msgstr "" - -#: templates/docs/sidebar.html:36 -msgid "PDF version" -msgstr "" - -#: templates/docs/toc.html:8 -msgid "Table of Contents" -msgstr "" - -#: templates/index/community.html:4 -msgid "ClickHouse community" -msgstr "" - -#: templates/index/community.html:13 templates/index/community.html:14 -msgid "ClickHouse YouTube Channel" -msgstr "" - -#: templates/index/community.html:25 templates/index/community.html:26 -msgid "ClickHouse Official Twitter Account" -msgstr "" - -#: templates/index/community.html:36 templates/index/community.html:37 -msgid "ClickHouse at Telegram" -msgstr "" - -#: templates/index/community.html:41 -msgid "Chat with real users in " -msgstr "" - -#: templates/index/community.html:44 templates/index/community.html:116 -msgid "English" -msgstr "" - -#: templates/index/community.html:45 -msgid "or in" -msgstr "" - -#: templates/index/community.html:47 templates/index/community.html:117 -msgid "Russian" -msgstr "" - -#: templates/index/community.html:65 -msgid "Open GitHub issue to ask for help or to file a feature request" -msgstr "" - -#: templates/index/community.html:76 templates/index/community.html:77 -msgid "ClickHouse Slack Workspace" -msgstr "" - -#: templates/index/community.html:82 -msgid "Multipurpose public hangout" -msgstr "" - -#: templates/index/community.html:101 -msgid "Ask any questions" -msgstr "" - -#: templates/index/community.html:115 -msgid "ClickHouse Blog" -msgstr "" - -#: templates/index/community.html:116 -msgid "in" -msgstr "" - -#: templates/index/community.html:128 templates/index/community.html:129 -msgid "ClickHouse at Google Groups" -msgstr "" - -#: templates/index/community.html:133 -msgid "Email discussions" -msgstr "" - -#: templates/index/community.html:142 -msgid "Like ClickHouse?" -msgstr "" - -#: templates/index/community.html:143 -msgid "Help to spread the word about it via" -msgstr "" - -#: templates/index/community.html:144 -msgid "and" -msgstr "" - -#: templates/index/community.html:153 -msgid "Hosting ClickHouse Meetups" -msgstr "" - -#: templates/index/community.html:157 -msgid "" -"ClickHouse meetups are essential for strengthening community worldwide, but " -"they couldn't be possible without the help of local organizers. Please, fill " -"this form if you want to become one or want to meet ClickHouse core team for " -"any other reason." -msgstr "" - -#: templates/index/community.html:159 -msgid "ClickHouse Meetup" -msgstr "" - -#: templates/index/community.html:165 -msgid "Name" -msgstr "" - -#: templates/index/community.html:168 -msgid "Email" -msgstr "" - -#: templates/index/community.html:171 -msgid "Company" -msgstr "" - -#: templates/index/community.html:174 -msgid "City" -msgstr "" - -#: templates/index/community.html:179 -msgid "We'd like to host a public ClickHouse Meetup" -msgstr "" - -#: templates/index/community.html:185 -msgid "We'd like to invite ClickHouse team to our office" -msgstr "" - -#: templates/index/community.html:191 -msgid "We'd like to invite ClickHouse team to another event we organize" -msgstr "" - -#: templates/index/community.html:197 -msgid "We're interested in commercial consulting, support or managed service" -msgstr "" - -#: templates/index/community.html:201 -msgid "Additional comments" -msgstr "" - -#: templates/index/community.html:203 -msgid "Send" -msgstr "" - -#: templates/index/community.html:212 -msgid "" -"If you have any more thoughts or questions, feel free to contact Yandex " -"ClickHouse team directly at" -msgstr "" - -#: templates/index/community.html:213 -msgid "turn on JavaScript to see email address" -msgstr "" diff --git a/website/locale/messages.pot b/website/locale/messages.pot deleted file mode 100644 index cfbdf52553b..00000000000 --- a/website/locale/messages.pot +++ /dev/null @@ -1,328 +0,0 @@ -# Translations template for PROJECT. -# Copyright (C) 2020 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 2020. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-06-17 12:20+0300\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.8.0\n" - -#: templates/common_meta.html:1 -msgid "" -"ClickHouse is a fast open-source column-oriented database management " -"system that allows generating analytical data reports in real-time using " -"SQL queries" -msgstr "" - -#: templates/common_meta.html:6 -msgid "ClickHouse - fast open-source OLAP DBMS" -msgstr "" - -#: templates/common_meta.html:10 -msgid "ClickHouse DBMS" -msgstr "" - -#: templates/common_meta.html:32 -msgid "open-source" -msgstr "" - -#: templates/common_meta.html:32 -msgid "relational" -msgstr "" - -#: templates/common_meta.html:32 -msgid "analytics" -msgstr "" - -#: templates/common_meta.html:32 -msgid "analytical" -msgstr "" - -#: templates/common_meta.html:32 -msgid "Big Data" -msgstr "" - -#: templates/common_meta.html:32 -msgid "web-analytics" -msgstr "" - -#: templates/footer.html:8 -msgid "ClickHouse source code is published under the Apache 2.0 License." -msgstr "" - -#: templates/footer.html:8 -msgid "" -"Software is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR " -"CONDITIONS OF ANY KIND, either express or implied." -msgstr "" - -#: templates/footer.html:11 -msgid "Yandex LLC" -msgstr "" - -#: templates/blog/content.html:20 templates/blog/content.html:25 -#: templates/blog/content.html:30 -msgid "Share on" -msgstr "" - -#: templates/blog/content.html:37 -msgid "Published date" -msgstr "" - -#: templates/blog/nav.html:20 -msgid "New post" -msgstr "" - -#: templates/blog/nav.html:25 -msgid "Documentation" -msgstr "" - -#: templates/docs/footer.html:3 -msgid "Rating" -msgstr "" - -#: templates/docs/footer.html:3 -msgid "votes" -msgstr "" - -#: templates/docs/footer.html:4 -msgid "Article Rating" -msgstr "" - -#: templates/docs/footer.html:4 -msgid "Was this content helpful?" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Unusable" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Poor" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Good" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Excellent" -msgstr "" - -#: templates/docs/footer.html:8 -msgid "documentation" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "Built from" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "published on" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "modified on" -msgstr "" - -#: templates/docs/machine-translated.html:3 -msgid "Help wanted!" -msgstr "" - -#: templates/docs/machine-translated.html:4 -msgid "" -"The following content of this documentation page has been machine-" -"translated. But unlike other websites, it is not done on the fly. This " -"translated text lives on GitHub repository alongside main ClickHouse " -"codebase and waits for fellow native speakers to make it more human-" -"readable." -msgstr "" - -#: templates/docs/machine-translated.html:4 -msgid "You can also use the original English version as a reference." -msgstr "" - -#: templates/docs/machine-translated.html:7 -msgid "Help ClickHouse documentation by editing this page" -msgstr "" - -#: templates/docs/sidebar.html:3 -msgid "Multi-page or single-page" -msgstr "" - -#: templates/docs/sidebar.html:5 -msgid "Multi-page version" -msgstr "" - -#: templates/docs/sidebar.html:8 -msgid "Single-page version" -msgstr "" - -#: templates/docs/sidebar.html:13 -msgid "Version" -msgstr "" - -#: templates/docs/sidebar.html:13 templates/docs/sidebar.html:19 -msgid "latest" -msgstr "" - -#: templates/docs/sidebar.html:36 -msgid "PDF version" -msgstr "" - -#: templates/docs/toc.html:8 -msgid "Table of Contents" -msgstr "" - -#: templates/index/community.html:4 -msgid "ClickHouse community" -msgstr "" - -#: templates/index/community.html:13 templates/index/community.html:14 -msgid "ClickHouse YouTube Channel" -msgstr "" - -#: templates/index/community.html:25 templates/index/community.html:26 -msgid "ClickHouse Official Twitter Account" -msgstr "" - -#: templates/index/community.html:36 templates/index/community.html:37 -msgid "ClickHouse at Telegram" -msgstr "" - -#: templates/index/community.html:41 -msgid "Chat with real users in " -msgstr "" - -#: templates/index/community.html:44 templates/index/community.html:116 -msgid "English" -msgstr "" - -#: templates/index/community.html:45 -msgid "or in" -msgstr "" - -#: templates/index/community.html:47 templates/index/community.html:117 -msgid "Russian" -msgstr "" - -#: templates/index/community.html:65 -msgid "Open GitHub issue to ask for help or to file a feature request" -msgstr "" - -#: templates/index/community.html:76 templates/index/community.html:77 -msgid "ClickHouse Slack Workspace" -msgstr "" - -#: templates/index/community.html:82 -msgid "Multipurpose public hangout" -msgstr "" - -#: templates/index/community.html:101 -msgid "Ask any questions" -msgstr "" - -#: templates/index/community.html:115 -msgid "ClickHouse Blog" -msgstr "" - -#: templates/index/community.html:116 -msgid "in" -msgstr "" - -#: templates/index/community.html:128 templates/index/community.html:129 -msgid "ClickHouse at Google Groups" -msgstr "" - -#: templates/index/community.html:133 -msgid "Email discussions" -msgstr "" - -#: templates/index/community.html:142 -msgid "Like ClickHouse?" -msgstr "" - -#: templates/index/community.html:143 -msgid "Help to spread the word about it via" -msgstr "" - -#: templates/index/community.html:144 -msgid "and" -msgstr "" - -#: templates/index/community.html:153 -msgid "Hosting ClickHouse Meetups" -msgstr "" - -#: templates/index/community.html:157 -msgid "" -"ClickHouse meetups are essential for strengthening community worldwide, " -"but they couldn't be possible without the help of local organizers. " -"Please, fill this form if you want to become one or want to meet " -"ClickHouse core team for any other reason." -msgstr "" - -#: templates/index/community.html:159 -msgid "ClickHouse Meetup" -msgstr "" - -#: templates/index/community.html:165 -msgid "Name" -msgstr "" - -#: templates/index/community.html:168 -msgid "Email" -msgstr "" - -#: templates/index/community.html:171 -msgid "Company" -msgstr "" - -#: templates/index/community.html:174 -msgid "City" -msgstr "" - -#: templates/index/community.html:179 -msgid "We'd like to host a public ClickHouse Meetup" -msgstr "" - -#: templates/index/community.html:185 -msgid "We'd like to invite ClickHouse team to our office" -msgstr "" - -#: templates/index/community.html:191 -msgid "We'd like to invite ClickHouse team to another event we organize" -msgstr "" - -#: templates/index/community.html:197 -msgid "We're interested in commercial consulting, support or managed service" -msgstr "" - -#: templates/index/community.html:201 -msgid "Additional comments" -msgstr "" - -#: templates/index/community.html:203 -msgid "Send" -msgstr "" - -#: templates/index/community.html:212 -msgid "" -"If you have any more thoughts or questions, feel free to contact Yandex " -"ClickHouse team directly at" -msgstr "" - -#: templates/index/community.html:213 -msgid "turn on JavaScript to see email address" -msgstr "" - diff --git a/website/locale/ru/LC_MESSAGES/messages.mo b/website/locale/ru/LC_MESSAGES/messages.mo deleted file mode 100644 index 0ed390123309cda7b0c6301c964ae232077af957..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 505 zcmaJ-O;5r=5XHn(kDfh@i5iIx?pC9uP?e8rFd(Ef+!t7CP1$X>TO$d7!(Zip@V7X{ zD4v{Tl6f=l&D+_}z1{Z;#SY~@H@?aDWAA?Q= zlOP=ThEGl?Gi@vm@;I4eyNGj)vE!DJZKeGFKPcwr0EJ`siYyBxziUaE<{X* zruYRINwM@mX(gReS{RYyy*A67TGXY(t<}k)@&yTUjC=q9 diff --git a/website/locale/ru/LC_MESSAGES/messages.po b/website/locale/ru/LC_MESSAGES/messages.po deleted file mode 100644 index ec6fbadc33b..00000000000 --- a/website/locale/ru/LC_MESSAGES/messages.po +++ /dev/null @@ -1,327 +0,0 @@ -# Translations template for PROJECT. -# Copyright (C) 2020 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# Automatically generated, 2020. -# -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-06-17 12:20+0300\n" -"PO-Revision-Date: 2020-06-17 12:20+0300\n" -"Last-Translator: Automatically generated\n" -"Language-Team: none\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.8.0\n" -"Language: ru\n" -"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n" -"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n" - -#: templates/common_meta.html:1 -msgid "" -"ClickHouse is a fast open-source column-oriented database management system " -"that allows generating analytical data reports in real-time using SQL queries" -msgstr "" - -#: templates/common_meta.html:6 -msgid "ClickHouse - fast open-source OLAP DBMS" -msgstr "" - -#: templates/common_meta.html:10 -msgid "ClickHouse DBMS" -msgstr "" - -#: templates/common_meta.html:32 -msgid "open-source" -msgstr "" - -#: templates/common_meta.html:32 -msgid "relational" -msgstr "" - -#: templates/common_meta.html:32 -msgid "analytics" -msgstr "" - -#: templates/common_meta.html:32 -msgid "analytical" -msgstr "" - -#: templates/common_meta.html:32 -msgid "Big Data" -msgstr "" - -#: templates/common_meta.html:32 -msgid "web-analytics" -msgstr "" - -#: templates/footer.html:8 -msgid "ClickHouse source code is published under the Apache 2.0 License." -msgstr "" - -#: templates/footer.html:8 -msgid "" -"Software is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR " -"CONDITIONS OF ANY KIND, either express or implied." -msgstr "" - -#: templates/footer.html:11 -msgid "Yandex LLC" -msgstr "" - -#: templates/blog/content.html:20 templates/blog/content.html:25 -#: templates/blog/content.html:30 -msgid "Share on" -msgstr "" - -#: templates/blog/content.html:37 -msgid "Published date" -msgstr "" - -#: templates/blog/nav.html:20 -msgid "New post" -msgstr "" - -#: templates/blog/nav.html:25 -msgid "Documentation" -msgstr "" - -#: templates/docs/footer.html:3 -msgid "Rating" -msgstr "" - -#: templates/docs/footer.html:3 -msgid "votes" -msgstr "" - -#: templates/docs/footer.html:4 -msgid "Article Rating" -msgstr "" - -#: templates/docs/footer.html:4 -msgid "Was this content helpful?" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Unusable" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Poor" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Good" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Excellent" -msgstr "" - -#: templates/docs/footer.html:8 -msgid "documentation" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "Built from" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "published on" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "modified on" -msgstr "" - -#: templates/docs/machine-translated.html:3 -msgid "Help wanted!" -msgstr "" - -#: templates/docs/machine-translated.html:4 -msgid "" -"The following content of this documentation page has been machine-" -"translated. But unlike other websites, it is not done on the fly. This " -"translated text lives on GitHub repository alongside main ClickHouse " -"codebase and waits for fellow native speakers to make it more human-readable." -msgstr "" - -#: templates/docs/machine-translated.html:4 -msgid "You can also use the original English version as a reference." -msgstr "" - -#: templates/docs/machine-translated.html:7 -msgid "Help ClickHouse documentation by editing this page" -msgstr "" - -#: templates/docs/sidebar.html:3 -msgid "Multi-page or single-page" -msgstr "" - -#: templates/docs/sidebar.html:5 -msgid "Multi-page version" -msgstr "" - -#: templates/docs/sidebar.html:8 -msgid "Single-page version" -msgstr "" - -#: templates/docs/sidebar.html:13 -msgid "Version" -msgstr "" - -#: templates/docs/sidebar.html:13 templates/docs/sidebar.html:19 -msgid "latest" -msgstr "" - -#: templates/docs/sidebar.html:36 -msgid "PDF version" -msgstr "" - -#: templates/docs/toc.html:8 -msgid "Table of Contents" -msgstr "" - -#: templates/index/community.html:4 -msgid "ClickHouse community" -msgstr "" - -#: templates/index/community.html:13 templates/index/community.html:14 -msgid "ClickHouse YouTube Channel" -msgstr "" - -#: templates/index/community.html:25 templates/index/community.html:26 -msgid "ClickHouse Official Twitter Account" -msgstr "" - -#: templates/index/community.html:36 templates/index/community.html:37 -msgid "ClickHouse at Telegram" -msgstr "" - -#: templates/index/community.html:41 -msgid "Chat with real users in " -msgstr "" - -#: templates/index/community.html:44 templates/index/community.html:116 -msgid "English" -msgstr "" - -#: templates/index/community.html:45 -msgid "or in" -msgstr "" - -#: templates/index/community.html:47 templates/index/community.html:117 -msgid "Russian" -msgstr "" - -#: templates/index/community.html:65 -msgid "Open GitHub issue to ask for help or to file a feature request" -msgstr "" - -#: templates/index/community.html:76 templates/index/community.html:77 -msgid "ClickHouse Slack Workspace" -msgstr "" - -#: templates/index/community.html:82 -msgid "Multipurpose public hangout" -msgstr "" - -#: templates/index/community.html:101 -msgid "Ask any questions" -msgstr "" - -#: templates/index/community.html:115 -msgid "ClickHouse Blog" -msgstr "" - -#: templates/index/community.html:116 -msgid "in" -msgstr "" - -#: templates/index/community.html:128 templates/index/community.html:129 -msgid "ClickHouse at Google Groups" -msgstr "" - -#: templates/index/community.html:133 -msgid "Email discussions" -msgstr "" - -#: templates/index/community.html:142 -msgid "Like ClickHouse?" -msgstr "" - -#: templates/index/community.html:143 -msgid "Help to spread the word about it via" -msgstr "" - -#: templates/index/community.html:144 -msgid "and" -msgstr "" - -#: templates/index/community.html:153 -msgid "Hosting ClickHouse Meetups" -msgstr "" - -#: templates/index/community.html:157 -msgid "" -"ClickHouse meetups are essential for strengthening community worldwide, but " -"they couldn't be possible without the help of local organizers. Please, fill " -"this form if you want to become one or want to meet ClickHouse core team for " -"any other reason." -msgstr "" - -#: templates/index/community.html:159 -msgid "ClickHouse Meetup" -msgstr "" - -#: templates/index/community.html:165 -msgid "Name" -msgstr "" - -#: templates/index/community.html:168 -msgid "Email" -msgstr "" - -#: templates/index/community.html:171 -msgid "Company" -msgstr "" - -#: templates/index/community.html:174 -msgid "City" -msgstr "" - -#: templates/index/community.html:179 -msgid "We'd like to host a public ClickHouse Meetup" -msgstr "" - -#: templates/index/community.html:185 -msgid "We'd like to invite ClickHouse team to our office" -msgstr "" - -#: templates/index/community.html:191 -msgid "We'd like to invite ClickHouse team to another event we organize" -msgstr "" - -#: templates/index/community.html:197 -msgid "We're interested in commercial consulting, support or managed service" -msgstr "" - -#: templates/index/community.html:201 -msgid "Additional comments" -msgstr "" - -#: templates/index/community.html:203 -msgid "Send" -msgstr "" - -#: templates/index/community.html:212 -msgid "" -"If you have any more thoughts or questions, feel free to contact Yandex " -"ClickHouse team directly at" -msgstr "" - -#: templates/index/community.html:213 -msgid "turn on JavaScript to see email address" -msgstr "" diff --git a/website/locale/zh/LC_MESSAGES/messages.mo b/website/locale/zh/LC_MESSAGES/messages.mo deleted file mode 100644 index c5c8a3ba5015c9ef19e27ca98c27e3953ccd7ba9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 424 zcmaJ+%TB{E5G)cWA31aIfg5abf&`VTO7sy>P(!0M>fKGYX^?A2wu5N@gWuy@a6nK_ zthCb3?r3KAg&9Z2@Bozbr^17rOG!Usmm$j-m9jt3d9)EJeWY6hDtWnaiKkyE{PhSCh$^ZZW diff --git a/website/locale/zh/LC_MESSAGES/messages.po b/website/locale/zh/LC_MESSAGES/messages.po deleted file mode 100644 index 031b66c1a75..00000000000 --- a/website/locale/zh/LC_MESSAGES/messages.po +++ /dev/null @@ -1,325 +0,0 @@ -# Translations template for PROJECT. -# Copyright (C) 2020 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# Automatically generated, 2020. -# -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-06-17 12:20+0300\n" -"PO-Revision-Date: 2020-06-17 12:20+0300\n" -"Last-Translator: Automatically generated\n" -"Language-Team: none\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.8.0\n" -"Language: zh\n" - -#: templates/common_meta.html:1 -msgid "" -"ClickHouse is a fast open-source column-oriented database management system " -"that allows generating analytical data reports in real-time using SQL queries" -msgstr "" - -#: templates/common_meta.html:6 -msgid "ClickHouse - fast open-source OLAP DBMS" -msgstr "" - -#: templates/common_meta.html:10 -msgid "ClickHouse DBMS" -msgstr "" - -#: templates/common_meta.html:32 -msgid "open-source" -msgstr "" - -#: templates/common_meta.html:32 -msgid "relational" -msgstr "" - -#: templates/common_meta.html:32 -msgid "analytics" -msgstr "" - -#: templates/common_meta.html:32 -msgid "analytical" -msgstr "" - -#: templates/common_meta.html:32 -msgid "Big Data" -msgstr "" - -#: templates/common_meta.html:32 -msgid "web-analytics" -msgstr "" - -#: templates/footer.html:8 -msgid "ClickHouse source code is published under the Apache 2.0 License." -msgstr "" - -#: templates/footer.html:8 -msgid "" -"Software is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR " -"CONDITIONS OF ANY KIND, either express or implied." -msgstr "" - -#: templates/footer.html:11 -msgid "Yandex LLC" -msgstr "" - -#: templates/blog/content.html:20 templates/blog/content.html:25 -#: templates/blog/content.html:30 -msgid "Share on" -msgstr "" - -#: templates/blog/content.html:37 -msgid "Published date" -msgstr "" - -#: templates/blog/nav.html:20 -msgid "New post" -msgstr "" - -#: templates/blog/nav.html:25 -msgid "Documentation" -msgstr "" - -#: templates/docs/footer.html:3 -msgid "Rating" -msgstr "" - -#: templates/docs/footer.html:3 -msgid "votes" -msgstr "" - -#: templates/docs/footer.html:4 -msgid "Article Rating" -msgstr "" - -#: templates/docs/footer.html:4 -msgid "Was this content helpful?" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Unusable" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Poor" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Good" -msgstr "" - -#: templates/docs/footer.html:7 -msgid "Excellent" -msgstr "" - -#: templates/docs/footer.html:8 -msgid "documentation" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "Built from" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "published on" -msgstr "" - -#: templates/docs/footer.html:15 -msgid "modified on" -msgstr "" - -#: templates/docs/machine-translated.html:3 -msgid "Help wanted!" -msgstr "" - -#: templates/docs/machine-translated.html:4 -msgid "" -"The following content of this documentation page has been machine-" -"translated. But unlike other websites, it is not done on the fly. This " -"translated text lives on GitHub repository alongside main ClickHouse " -"codebase and waits for fellow native speakers to make it more human-readable." -msgstr "" - -#: templates/docs/machine-translated.html:4 -msgid "You can also use the original English version as a reference." -msgstr "" - -#: templates/docs/machine-translated.html:7 -msgid "Help ClickHouse documentation by editing this page" -msgstr "" - -#: templates/docs/sidebar.html:3 -msgid "Multi-page or single-page" -msgstr "" - -#: templates/docs/sidebar.html:5 -msgid "Multi-page version" -msgstr "" - -#: templates/docs/sidebar.html:8 -msgid "Single-page version" -msgstr "" - -#: templates/docs/sidebar.html:13 -msgid "Version" -msgstr "" - -#: templates/docs/sidebar.html:13 templates/docs/sidebar.html:19 -msgid "latest" -msgstr "" - -#: templates/docs/sidebar.html:36 -msgid "PDF version" -msgstr "" - -#: templates/docs/toc.html:8 -msgid "Table of Contents" -msgstr "" - -#: templates/index/community.html:4 -msgid "ClickHouse community" -msgstr "" - -#: templates/index/community.html:13 templates/index/community.html:14 -msgid "ClickHouse YouTube Channel" -msgstr "" - -#: templates/index/community.html:25 templates/index/community.html:26 -msgid "ClickHouse Official Twitter Account" -msgstr "" - -#: templates/index/community.html:36 templates/index/community.html:37 -msgid "ClickHouse at Telegram" -msgstr "" - -#: templates/index/community.html:41 -msgid "Chat with real users in " -msgstr "" - -#: templates/index/community.html:44 templates/index/community.html:116 -msgid "English" -msgstr "" - -#: templates/index/community.html:45 -msgid "or in" -msgstr "" - -#: templates/index/community.html:47 templates/index/community.html:117 -msgid "Russian" -msgstr "" - -#: templates/index/community.html:65 -msgid "Open GitHub issue to ask for help or to file a feature request" -msgstr "" - -#: templates/index/community.html:76 templates/index/community.html:77 -msgid "ClickHouse Slack Workspace" -msgstr "" - -#: templates/index/community.html:82 -msgid "Multipurpose public hangout" -msgstr "" - -#: templates/index/community.html:101 -msgid "Ask any questions" -msgstr "" - -#: templates/index/community.html:115 -msgid "ClickHouse Blog" -msgstr "" - -#: templates/index/community.html:116 -msgid "in" -msgstr "" - -#: templates/index/community.html:128 templates/index/community.html:129 -msgid "ClickHouse at Google Groups" -msgstr "" - -#: templates/index/community.html:133 -msgid "Email discussions" -msgstr "" - -#: templates/index/community.html:142 -msgid "Like ClickHouse?" -msgstr "" - -#: templates/index/community.html:143 -msgid "Help to spread the word about it via" -msgstr "" - -#: templates/index/community.html:144 -msgid "and" -msgstr "" - -#: templates/index/community.html:153 -msgid "Hosting ClickHouse Meetups" -msgstr "" - -#: templates/index/community.html:157 -msgid "" -"ClickHouse meetups are essential for strengthening community worldwide, but " -"they couldn't be possible without the help of local organizers. Please, fill " -"this form if you want to become one or want to meet ClickHouse core team for " -"any other reason." -msgstr "" - -#: templates/index/community.html:159 -msgid "ClickHouse Meetup" -msgstr "" - -#: templates/index/community.html:165 -msgid "Name" -msgstr "" - -#: templates/index/community.html:168 -msgid "Email" -msgstr "" - -#: templates/index/community.html:171 -msgid "Company" -msgstr "" - -#: templates/index/community.html:174 -msgid "City" -msgstr "" - -#: templates/index/community.html:179 -msgid "We'd like to host a public ClickHouse Meetup" -msgstr "" - -#: templates/index/community.html:185 -msgid "We'd like to invite ClickHouse team to our office" -msgstr "" - -#: templates/index/community.html:191 -msgid "We'd like to invite ClickHouse team to another event we organize" -msgstr "" - -#: templates/index/community.html:197 -msgid "We're interested in commercial consulting, support or managed service" -msgstr "" - -#: templates/index/community.html:201 -msgid "Additional comments" -msgstr "" - -#: templates/index/community.html:203 -msgid "Send" -msgstr "" - -#: templates/index/community.html:212 -msgid "" -"If you have any more thoughts or questions, feel free to contact Yandex " -"ClickHouse team directly at" -msgstr "" - -#: templates/index/community.html:213 -msgid "turn on JavaScript to see email address" -msgstr "" diff --git a/website/support/agreement/index.html b/website/support/agreement/index.html deleted file mode 100644 index 59e5ca9ab75..00000000000 --- a/website/support/agreement/index.html +++ /dev/null @@ -1,27 +0,0 @@ -{% set prefetch_items = [ - ('/docs/en/', 'document') -] %} - -{% extends "templates/base.html" %} - -{% block extra_meta %} - -{% include "templates/common_fonts.html" %} -{% endblock %} - -{% block nav %} - -{% include "templates/global/nav.html" %} - -{% endblock %} - -{% block content %} - -{% include "templates/support/agreement-hero.html" %} - -{% include "templates/support/agreement-content.html" %} - -{% include "templates/global/newsletter.html" %} -{% include "templates/global/github_stars.html" %} - -{% endblock %} diff --git a/website/support/case/index.html b/website/support/case/index.html deleted file mode 100644 index 07f77d65d46..00000000000 --- a/website/support/case/index.html +++ /dev/null @@ -1,27 +0,0 @@ -{% set prefetch_items = [ - ('/docs/en/', 'document') -] %} - -{% extends "templates/base.html" %} - -{% block extra_meta %} - -{% include "templates/common_fonts.html" %} -{% endblock %} - -{% block nav %} - -{% include "templates/global/nav.html" %} - -{% endblock %} - -{% block content %} - -{% include "templates/support/hero.html" %} -{% include "templates/support/overview.html" %} -{% include "templates/support/form.html" %} - -{% include "templates/global/newsletter.html" %} -{% include "templates/global/github_stars.html" %} - -{% endblock %} diff --git a/website/support/policy/index.html b/website/support/policy/index.html deleted file mode 100644 index babff39a747..00000000000 --- a/website/support/policy/index.html +++ /dev/null @@ -1,27 +0,0 @@ -{% set prefetch_items = [ - ('/docs/en/', 'document') -] %} - -{% extends "templates/base.html" %} - -{% block extra_meta %} - -{% include "templates/common_fonts.html" %} -{% endblock %} - -{% block nav %} - -{% include "templates/global/nav.html" %} - -{% endblock %} - -{% block content %} - -{% include "templates/support/policy-hero.html" %} - -{% include "templates/support/policy-content.html" %} - -{% include "templates/global/newsletter.html" %} -{% include "templates/global/github_stars.html" %} - -{% endblock %} diff --git a/website/thank-you/index.html b/website/thank-you/index.html deleted file mode 100644 index 2f1a172e277..00000000000 --- a/website/thank-you/index.html +++ /dev/null @@ -1,25 +0,0 @@ -{% set prefetch_items = [ - ('/docs/en/', 'document') -] %} - -{% extends "templates/base.html" %} - -{% block extra_meta %} -{% include "templates/common_fonts.html" %} -{% endblock %} - -{% block nav %} - -{% include "templates/global/nav.html" %} - -{% endblock %} - -{% block content %} - -{% include "templates/contact-thank-you/hero.html" %} -{% include "templates/contact-thank-you/overview.html" %} - -{% include "templates/global/newsletter.html" %} -{% include "templates/global/github_stars.html" %} - -{% endblock %} From d71780d34ca36200493b06e44ddac688263e8e68 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 02:18:38 +0200 Subject: [PATCH 567/615] Fix some tests --- ...insert_without_explicit_database.reference | 4 +- ...s_and_insert_without_explicit_database.sql | 4 +- .../00206_empty_array_to_single.reference | 2 +- tests/queries/0_stateless/00301_csv.reference | 2 +- .../0_stateless/00502_sum_map.reference | 2 +- .../00506_shard_global_in_union.reference | 16 +- ...aving_time_shift_backwards_at_midnight.sql | 2 +- .../00927_asof_join_other_types.reference | 12 +- .../01087_storage_generate.reference | 200 +++++++++--------- .../0_stateless/01087_storage_generate.sql | 5 +- .../01087_table_function_generate.reference | 84 ++++---- .../01087_table_function_generate.sql | 17 +- .../01186_conversion_to_nullable.reference | 8 +- .../01280_min_map_max_map.reference | 4 +- ..._date_time_best_effort_timestamp.reference | 2 +- .../01440_to_date_monotonicity.reference | 2 +- .../01676_reinterpret_as.reference | 4 +- .../01691_DateTime64_clamp.reference | 10 +- .../01692_DateTime64_from_DateTime.reference | 4 +- .../0_stateless/01699_timezoneOffset.sql | 17 +- ..._toDateTime_from_string_clamping.reference | 2 +- ...re_consistent_datetime64_parsing.reference | 6 +- .../01734_datetime64_from_float.reference | 6 +- .../01772_to_start_of_hour_align.sql | 4 +- .../01852_map_combinator.reference | 2 +- ...68_order_by_fill_with_datetime64.reference | 28 +-- .../01921_datatype_date32.reference | 8 +- .../02096_date_time_1970_saturation.reference | 14 +- .../02184_default_table_engine.reference | 4 +- .../02184_default_table_engine.sql | 4 +- 30 files changed, 238 insertions(+), 241 deletions(-) diff --git a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference index 2176028b1c0..788c54e3b42 100644 --- a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference +++ b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.reference @@ -1,5 +1,5 @@ -2014-01-02 0 0 1970-01-01 03:00:00 2014-01-02 03:04:06 -1 2014-01-02 07:04:06 +2014-01-02 0 0 1969-12-31 16:00:00 2014-01-02 03:04:06 +1 2014-01-01 19:04:06 0 0 0 diff --git a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql index f3130f24521..b05b49ba33a 100644 --- a/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql +++ b/tests/queries/0_stateless/00101_materialized_views_and_insert_without_explicit_database.sql @@ -8,8 +8,8 @@ DROP TABLE IF EXISTS test_table; DROP TABLE IF EXISTS test_view; DROP TABLE IF EXISTS test_view_filtered; -CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Asia/Istanbul'), UTCEventTime DateTime('UTC')) ENGINE = MergeTree(EventDate, CounterID, 8192); -CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Asia/Istanbul')) ENGINE = Memory AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; +CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('America/Los_Angeles'), UTCEventTime DateTime('UTC')) ENGINE = MergeTree(EventDate, CounterID, 8192); +CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('America/Los_Angeles')) ENGINE = Memory AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; CREATE MATERIALIZED VIEW test_view_filtered (EventDate Date, CounterID UInt32) ENGINE = Memory POPULATE AS SELECT CounterID, EventDate FROM test_table WHERE EventDate < '2013-01-01'; INSERT INTO test_table (EventDate, UTCEventTime) VALUES ('2014-01-02', '2014-01-02 03:04:06'); diff --git a/tests/queries/0_stateless/00206_empty_array_to_single.reference b/tests/queries/0_stateless/00206_empty_array_to_single.reference index 1f616bf2f86..248affbb606 100644 --- a/tests/queries/0_stateless/00206_empty_array_to_single.reference +++ b/tests/queries/0_stateless/00206_empty_array_to_single.reference @@ -1,7 +1,7 @@ [1,2] [0] [4,5,6] -[''] ['1970-01-01'] ['1970-01-01 03:00:00'] +[''] ['1970-01-01'] ['1970-01-01 02:00:00'] [0] [''] ['1970-01-01 00:00:00'] ['1970-01-01'] [0] ['0'] ['2015-01-01 00:00:00'] ['2015-01-01'] [0,1] [''] ['2015-01-01 00:00:00','2015-01-01 00:00:01'] ['2015-01-01','2015-01-02'] diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index a9351f91f70..9863da4b640 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -7,7 +7,7 @@ default-eof 1 2019-06-19 2016-01-01 01:02:03 1 2016-01-02 01:02:03 2 2017-08-15 13:15:01 3 -1970-01-02 06:46:39 4 +1970-01-02 05:46:39 4 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N diff --git a/tests/queries/0_stateless/00502_sum_map.reference b/tests/queries/0_stateless/00502_sum_map.reference index efd5a5534d4..31b067a2bc9 100644 --- a/tests/queries/0_stateless/00502_sum_map.reference +++ b/tests/queries/0_stateless/00502_sum_map.reference @@ -16,7 +16,7 @@ ([1],[1]) ([1],[1]) (['a'],[1]) -(['1970-01-01 03:00:01'],[1]) +(['1970-01-01 02:00:01'],[1]) (['1970-01-02'],[1]) (['01234567-89ab-cdef-0123-456789abcdef'],[1]) ([1.01],[1]) diff --git a/tests/queries/0_stateless/00506_shard_global_in_union.reference b/tests/queries/0_stateless/00506_shard_global_in_union.reference index 0ba9c36de26..c8fbaaf12a5 100644 --- a/tests/queries/0_stateless/00506_shard_global_in_union.reference +++ b/tests/queries/0_stateless/00506_shard_global_in_union.reference @@ -9,14 +9,14 @@ NOW BAD ==========================: 34 finish ===========================; * A UNION * B: -A 1970-01-01 03:00:01 -B 1970-01-01 03:00:02 +A 1970-01-01 02:00:01 +B 1970-01-01 02:00:02 Event, Datetime A UNION * B: -A 1970-01-01 03:00:01 -B 1970-01-01 03:00:02 +A 1970-01-01 02:00:01 +B 1970-01-01 02:00:02 * A UNION Event, Datetime B: -A 1970-01-01 03:00:01 -B 1970-01-01 03:00:02 +A 1970-01-01 02:00:01 +B 1970-01-01 02:00:02 Event, Datetime A UNION Event, Datetime B: -A 1970-01-01 03:00:01 -B 1970-01-01 03:00:02 +A 1970-01-01 02:00:01 +B 1970-01-01 02:00:02 diff --git a/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql b/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql index 1fdd9b20b2b..4244ce2039b 100644 --- a/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql +++ b/tests/queries/0_stateless/00802_daylight_saving_time_shift_backwards_at_midnight.sql @@ -1,3 +1,3 @@ -- concat with empty string to defeat injectiveness of toString assumption. -SELECT concat('', toString(toDateTime('1981-09-29 00:00:00', 'Asia/Istanbul') + INTERVAL number * 300 SECOND)) AS k FROM numbers(10000) GROUP BY k HAVING count() > 1 ORDER BY k; +SELECT concat('', toString(toDateTime('1981-09-29 00:00:00', 'Europe/Moscow') + INTERVAL number * 300 SECOND)) AS k FROM numbers(10000) GROUP BY k HAVING count() > 1 ORDER BY k; SELECT concat('', toString(toDateTime('2018-09-19 00:00:00', 'Asia/Tehran') + INTERVAL number * 300 SECOND)) AS k FROM numbers(1000) GROUP BY k HAVING count() > 1 ORDER BY k; diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.reference b/tests/queries/0_stateless/00927_asof_join_other_types.reference index 83ee534ff91..80c85ec1ae3 100644 --- a/tests/queries/0_stateless/00927_asof_join_other_types.reference +++ b/tests/queries/0_stateless/00927_asof_join_other_types.reference @@ -10,9 +10,9 @@ 2 1 1 0 2 3 3 3 2 5 5 3 -2 1970-01-01 03:00:01 1 0 -2 1970-01-01 03:00:03 3 3 -2 1970-01-01 03:00:05 5 3 +2 1970-01-01 02:00:01 1 0 +2 1970-01-01 02:00:03 3 3 +2 1970-01-01 02:00:05 5 3 2 1 1 0 2 3 3 3 2 5 5 3 @@ -22,6 +22,6 @@ 2 1 1 0 2 3 3 3 2 5 5 3 -2 1970-01-01 03:00:00.001 1 0 -2 1970-01-01 03:00:00.003 3 3 -2 1970-01-01 03:00:00.005 5 3 +2 1970-01-01 02:00:00.001 1 0 +2 1970-01-01 02:00:00.003 3 3 +2 1970-01-01 02:00:00.005 5 3 diff --git a/tests/queries/0_stateless/01087_storage_generate.reference b/tests/queries/0_stateless/01087_storage_generate.reference index 78c6784f7d2..00ea8ac914f 100644 --- a/tests/queries/0_stateless/01087_storage_generate.reference +++ b/tests/queries/0_stateless/01087_storage_generate.reference @@ -1,103 +1,103 @@ 100 - -[] -54259.6828 ('2088-03-01 16:26:24.094','d3c2a216-a98c-d56c-7bf7-62de9f264cf4') -[88] 34528.4014 ('2031-12-09 00:40:39.898','9ef777c8-de0e-d25e-e16c-5b624f88523c') -[-1] 121968.7945 ('2060-02-05 09:18:12.011','7655e515-d2ca-2f06-0950-e4f44f69aca7') -[-103,75] -135033.4349 ('2038-12-19 20:38:58.695','86b57d15-292d-2517-9acf-47cd053e7a3a') -[110] -202668.69 ('2009-06-18 01:53:29.808','bc630f78-7d58-0c46-dd4b-27fc35625e96') -[-22,2] 168636.9728 ('2074-09-03 09:20:20.936','7624ce27-9bff-4e9d-3f18-6851a97dd0ca') -[-22,-62] -75192.4989 ('2085-10-11 21:51:12.855','a4c4d0ed-f448-244e-1723-ca1bba816f2b') -[-2,-90] 133592.5064 ('2010-10-28 21:18:04.633','8ba9103b-f90c-b49b-38c1-223ae5f42bf7') -[-94,80] 197330.6359 ('2024-03-30 22:08:45.772','83442013-3677-5097-065d-72dfbe8a3506') -[23] 167557.6237 ('2078-07-25 21:54:42.480','be14d98e-5b24-54ee-c959-d24fa9a58fdd') -[46,-10,-63] 185107.1979 ('2040-10-07 06:06:53.504','5ed1fe6a-9313-41d7-4bf9-3948e961509f') -[-107,68] -163781.3045 ('2021-12-21 19:18:58.933','7b634f19-0863-829e-484b-be288aab54a1') -[-35,-116,73] -203577.5379 ('2093-08-01 20:21:09.407','d371bad4-b098-ffdd-f84c-6a02390c2939') -[61] 152284.9386 ('2089-12-20 19:21:33.149','9e8426c1-278a-4d9c-4076-364a95b065e3') -[75] 170968.4171 ('2020-07-17 15:45:31.975','47397a81-bda7-8bd9-59f7-d60e2204fe99') -[-115,93] -173740.5652 ('2098-04-25 22:10:33.327','117e31dd-102e-ee6c-0dbd-0a4203c18ca5') -[-20,4,21] 63834.8685 ('2000-07-08 18:09:40.271','10b0fa48-55a3-755a-4a44-36315ae04c1c') -[-110,117,91] -160640.1506 ('1998-04-18 10:58:04.479','6dfa3a8e-6e65-543c-5f50-1ff45835aa5a') -[62] 63817.7977 ('2043-01-24 02:07:18.972','98b8ef31-4f65-2f8b-1ea7-b1473900099e') -[-2] -175477.0173 ('2007-01-16 07:46:14.781','ec92f616-6e1f-003a-54c6-c5f9118d2f1b') -[] 197663.3035 ('2046-06-30 17:04:56.788','fb3244a4-8af2-104f-2a6f-25a7b7b9a112') -[-24] -174299.4691 ('2058-02-23 14:50:58.839','d63ee868-fa93-bf8b-0264-8ebbceb13e3b') -[95,38] -65083.7371 ('2015-03-10 13:33:16.429','47bd199c-f99e-51ea-84e9-b65cce9d167c') -[91,110,72] 130908.9643 ('2036-03-16 15:17:53.679','0dd4ca31-1e09-d7e0-f3df-60cad3cfa805') -[] 208972.3779 ('2034-03-05 22:29:21.994','1069d77c-dfd2-912e-60b8-3c5b964f7e11') -[-32] 167938.505 ('2093-09-10 20:39:39.050','9d1025b6-2d0c-1d84-dafd-02668eb29270') -[] 153744.6987 ('2088-10-02 11:02:11.024','a88e6cb7-2210-5ce5-6bcf-24afc0eca5b6') -[67] -74220.665 ('2074-12-30 18:43:40.817','68096065-18c8-8aca-fd21-15330ead669d') -[6] 66759.8938 ('2091-09-01 19:07:18.219','bb14f4cc-0b54-9a8c-e835-71333b28c03b') -[-28,-82,9] 168625.3131 ('2002-03-20 21:02:30.321','405bb877-6e28-8b91-cb62-bd82a3fa797c') -[] -19760.167 ('2044-11-08 07:52:03.325','13769348-9e58-0e75-3972-8bbadc150715') -[] 160663.7797 ('2025-04-12 13:17:53.501','e6370321-94f5-97e6-0348-a84e72ff5b42') -[-17,18] 99105.9856 ('1972-05-01 12:23:11.688','02618b9e-97cd-4698-d2e8-3f52f4c5a09a') -[86,77] -116990.3914 ('1981-12-31 05:06:54.198','3ac42bb4-8652-b1a8-10bb-98f0337261f8') -[-109,69,-63] -151527.3587 ('2001-01-17 11:19:56.504','77fe7ee2-f279-2855-bfd2-a7d7cee678cc') -[] -57762.3928 ('1978-08-16 18:47:37.660','ab9a110a-fd8d-3c4c-5a49-34c2005536ce') -[-77] 107274.6407 ('2017-01-12 12:03:02.657','c1ad4f17-cc54-45f3-9410-9c1011653f6d') -[] 107133.641 ('2050-10-05 06:29:27.154','36e576aa-c77f-994e-1925-4a4c40da3a0f') -[] 46672.2176 ('2094-01-21 20:25:39.144','e9ba850d-604e-bc7d-417c-1078e89d4615') -[-87,-122,-65] -86258.4663 ('2081-06-17 03:37:45.498','64795221-9719-7937-b4d2-be5f30065ece') -[-53] -48672.1424 ('1992-06-27 17:27:23.602','7c67bc31-c7bb-6197-fdca-f73329b976f2') -[34] -108954.782 ('2096-07-03 23:06:30.632','9c1b37d7-4ced-9428-a0ae-34c5436b14c4') -[] -168124.2364 ('1987-06-03 06:47:12.945','d1c39af4-f920-5095-b8e2-0f878950167b') -[] -112431.4799 ('2021-07-26 07:04:58.527','da07a72d-7e1f-8890-4c4b-326835d11b39') -[-35,-95,58] -181254.9139 ('2086-11-12 17:17:14.473','22f74d0b-dfc0-3f7a-33f4-8055d8fa7846') -[98,119] 11468.5238 ('2092-02-25 11:07:07.695','a1fb97bf-1885-6715-c233-b88a6cd111e4') -[] 82333.8963 ('1989-11-23 01:38:57.012','a2b82b5b-8331-555c-579b-de4b0eeb7e81') -[-5,-66,69] 32055.8376 ('2040-12-17 16:49:08.704','4537d25e-a2db-ea9a-8e24-a16ed7e0c6e4') -[81,-84,-24] -210815.2512 ('2047-06-09 13:30:06.922','ac3c5b5f-f977-2830-c398-d10a6076a498') -[84,-105] -175413.7733 ('1998-11-03 04:30:21.191','c535feac-1943-c0a1-23f0-645d5406db24') -[58,31] -335.8512 ('1973-07-09 12:21:10.444','24a7dd3d-2565-1de3-05d9-e45fd8ba7729') -[-49,-47] 177399.2836 ('2049-03-15 15:33:00.190','e4432b9b-61e9-d451-dc87-ae3b9da6fd35') -[] 211525.2349 ('2106-01-11 10:44:18.918','23315435-7132-05b5-5a9b-c2c738433a87') -[45,-95,-39] -15314.9732 ('2055-10-29 13:51:12.182','833b2efa-8c72-f5f6-3040-cb4831e8ceb9') -[] 213384.5774 ('2067-02-10 22:02:42.113','0cd7f438-caa7-0d21-867c-1fdb6d67d797') -[99] -147316.5599 ('2000-05-09 21:37:34.776','a3ea6796-38d5-72ff-910d-8b4300831916') -[] 8828.2471 ('1993-11-30 16:53:22.503','7209213f-38bb-cfed-1955-f1fad5a9577a') -[117,9,-35] -134812.6269 ('2065-09-04 23:47:26.589','d33d0d6f-b9c0-2850-4593-cfc9f1e20a4d') -[-35,-58,-101] -9101.5369 ('2023-08-24 20:56:11.695','87fbe3f9-b1f0-c030-a4c0-8662045923b4') -[-58,87] 122510.9099 ('2019-08-09 17:40:29.849','c1d3a2cc-878f-c2c3-4a0b-10e98cda8b4a') -[4,19,58] -13496.8672 ('2027-05-01 09:11:48.659','8996ae31-d670-cbfe-b735-b16b7c3b3476') -[23,-75,-89] -51218.286 ('2010-06-02 02:49:03.396','d32b8b61-cc3e-31fa-2a2a-abefa60bfcee') -[50] -45297.4315 ('2087-04-15 06:46:08.247','04fe9603-97fc-07a4-6248-0f21e408c884') -[-23,17,63] 89185.9462 ('2065-10-26 08:27:12.817','a5fbf764-70b4-8b65-4a8f-7550abca3859') -[-6] -129925.369 ('2013-11-05 07:44:45.233','11db26b3-e2b5-b9fa-6b0e-79c43a2e67ab') -[-72,-108] 203171.5475 ('2000-01-28 09:34:58.032','14d5399e-7949-20c7-0e47-85e2fce5836c') -[-73,34,-27] 2676.7265 ('2057-10-25 14:37:10.049','00049a92-4350-badb-3764-dd7f019b9b31') -[65,-7] -153472.9461 ('1973-04-12 02:34:41.245','e0a0324d-1552-d11e-f3a5-fbd822d206c5') -[] 81837.7838 ('2041-09-20 20:56:39.712','f7923f2c-e526-1706-79b9-58045d9deaa7') -[-113,8] 173192.6905 ('2066-04-02 09:59:59.356','e3013e5c-92e3-c03c-b57a-e1939e00a1a7') -[107] 9694.1102 ('1984-11-02 13:11:34.034','e973db18-07b7-2117-f3ba-e7002adfa939') -[] -76460.9664 ('2051-02-10 09:54:42.143','b8344c22-9e8a-7052-c644-9c3e5989cdf1') -[59,59,0] 27041.7606 ('2083-02-17 18:21:22.547','4d6b137b-a3e1-f36d-2c0c-c8d718dda388') -[-114] 133673.963 ('2005-10-02 20:34:27.452','04785b75-30e5-af8b-547e-d15bcb7f49fb') -[43] -169861.2 ('2006-12-13 09:26:13.923','cb865d38-d961-d7f9-acbb-583b9f31252f') -[] 197115.2174 ('2060-04-08 04:17:00.488','0f26c4b4-b24c-1fd5-c619-31bcf71a4831') -[-25] -200081.9506 ('2055-12-25 02:30:16.276','0b32ad69-2c84-4269-9718-e3171482878a') -[14,110] -40196.4463 ('2084-08-13 19:37:07.588','ed882071-acba-b3ab-5d77-d79a9544a834') -[-62,-71,-82] -154958.9747 ('2100-07-08 02:32:53.741','7711c7c1-0d22-e302-fc86-61ef5e68db96') -[96,-114,-101] 78910.332 ('2100-07-19 15:02:27.109','756bfd26-c4b3-94b8-e991-c7ab7a833b76') -[49] 80117.2267 ('1970-07-04 03:50:56.748','aebac019-9054-4a77-2ccd-8801fc4a7496') -[] 102078.4801 ('2055-01-07 01:22:33.624','21f2e59a-a1ca-5df3-27fd-aa95456cfbe5') -[-106] -108728.4237 ('2020-05-27 11:56:18.121','6b7b6674-9342-2360-4cc0-f7ef8a2404de') -[] 173213.5631 ('2034-01-18 19:04:16.059','2dc0038d-67c1-f0ee-280b-f3f0f536b01a') -[42] 139872.2503 ('2001-07-16 11:09:28.754','d6487da6-1077-1053-f314-9a1079f5df15') -[] 1107.5244 ('2031-02-26 15:06:00.846','b32bee8f-85b7-3c71-bb24-9a0093e6a08c') -[] 85892.8913 ('2088-04-13 14:54:18.514','84f3b59b-8d23-78a6-3032-91392344584f') -[43] -109644.2714 ('1974-07-04 14:45:43.139','cf722ca8-15f5-6fe2-997c-0cf88e95e902') -[] 212557.3762 ('2069-03-03 07:21:08.439','9e676cac-36e6-2962-f7b1-578214f0dfbd') -[-128,55] 80471.0777 ('1970-04-01 18:54:40.257','ca358854-416b-9c95-0b9b-c7fed7bb7cb5') -[-30,-54] -132205.4512 ('2017-12-15 22:54:15.750','3558faa4-2d2f-c533-437f-1e03d3600f1d') -[-116,-72] -91499.667 ('2105-09-23 21:06:17.755','07bb6e47-3234-c268-40d7-332388dc06f8') -[] -201636.5228 ('2085-01-27 07:54:42.717','86c3bdc3-ff0f-1723-07c2-845aa3c02370') -[-103,-39] 44330.7722 ('2064-07-02 11:08:28.068','0869c79d-6bdd-5d2d-a3d1-ffe13f6aa810') -[99] -31035.5391 ('2093-07-26 01:50:23.026','aeb59338-254f-dc09-fbd7-263da415e211') -[101] 157961.4729 ('2036-05-04 02:35:07.845','8b6221a9-8dad-4655-7460-6b3031b06893') -[111] 84732.4403 ('1997-04-06 16:10:18.624','08806a79-59f4-c833-eedc-a200bb851767') -[9,-48] -190491.559 ('2031-11-03 19:47:03.757','914e6166-c96e-e0e4-101a-0bb516cf5a2f') -[-41] -132501.8311 ('2089-11-21 21:38:28.848','6de6cc8d-3c49-641e-fb12-87ed5ecb97b0') -[77] 64903.6579 ('1985-04-17 17:08:03.998','26484b8a-f3f1-587f-7777-bc7a57a689c3') +[] -54259.6828 ('2088-03-01 13:26:24.094','d3c2a216-a98c-d56c-7bf7-62de9f264cf4') +[88] 34528.4014 ('2031-12-08 21:40:39.898','9ef777c8-de0e-d25e-e16c-5b624f88523c') +[-1] 121968.7945 ('2060-02-05 06:18:12.011','7655e515-d2ca-2f06-0950-e4f44f69aca7') +[-103,75] -135033.4349 ('2038-12-19 17:38:58.695','86b57d15-292d-2517-9acf-47cd053e7a3a') +[110] -202668.69 ('2009-06-17 21:53:29.808','bc630f78-7d58-0c46-dd4b-27fc35625e96') +[-22,2] 168636.9728 ('2074-09-03 06:20:20.936','7624ce27-9bff-4e9d-3f18-6851a97dd0ca') +[-22,-62] -75192.4989 ('2085-10-11 18:51:12.855','a4c4d0ed-f448-244e-1723-ca1bba816f2b') +[-2,-90] 133592.5064 ('2010-10-28 17:18:04.633','8ba9103b-f90c-b49b-38c1-223ae5f42bf7') +[-94,80] 197330.6359 ('2024-03-30 19:08:45.772','83442013-3677-5097-065d-72dfbe8a3506') +[23] 167557.6237 ('2078-07-25 18:54:42.480','be14d98e-5b24-54ee-c959-d24fa9a58fdd') +[46,-10,-63] 185107.1979 ('2040-10-07 03:06:53.504','5ed1fe6a-9313-41d7-4bf9-3948e961509f') +[-107,68] -163781.3045 ('2021-12-21 16:18:58.933','7b634f19-0863-829e-484b-be288aab54a1') +[-35,-116,73] -203577.5379 ('2093-08-01 17:21:09.407','d371bad4-b098-ffdd-f84c-6a02390c2939') +[61] 152284.9386 ('2089-12-20 16:21:33.149','9e8426c1-278a-4d9c-4076-364a95b065e3') +[75] 170968.4171 ('2020-07-17 12:45:31.975','47397a81-bda7-8bd9-59f7-d60e2204fe99') +[-115,93] -173740.5652 ('2098-04-25 19:10:33.327','117e31dd-102e-ee6c-0dbd-0a4203c18ca5') +[-20,4,21] 63834.8685 ('2000-07-08 14:09:40.271','10b0fa48-55a3-755a-4a44-36315ae04c1c') +[-110,117,91] -160640.1506 ('1998-04-18 06:58:04.479','6dfa3a8e-6e65-543c-5f50-1ff45835aa5a') +[62] 63817.7977 ('2043-01-23 23:07:18.972','98b8ef31-4f65-2f8b-1ea7-b1473900099e') +[-2] -175477.0173 ('2007-01-16 04:46:14.781','ec92f616-6e1f-003a-54c6-c5f9118d2f1b') +[] 197663.3035 ('2046-06-30 14:04:56.788','fb3244a4-8af2-104f-2a6f-25a7b7b9a112') +[-24] -174299.4691 ('2058-02-23 11:50:58.839','d63ee868-fa93-bf8b-0264-8ebbceb13e3b') +[95,38] -65083.7371 ('2015-03-10 10:33:16.429','47bd199c-f99e-51ea-84e9-b65cce9d167c') +[91,110,72] 130908.9643 ('2036-03-16 12:17:53.679','0dd4ca31-1e09-d7e0-f3df-60cad3cfa805') +[] 208972.3779 ('2034-03-05 19:29:21.994','1069d77c-dfd2-912e-60b8-3c5b964f7e11') +[-32] 167938.505 ('2093-09-10 17:39:39.050','9d1025b6-2d0c-1d84-dafd-02668eb29270') +[] 153744.6987 ('2088-10-02 08:02:11.024','a88e6cb7-2210-5ce5-6bcf-24afc0eca5b6') +[67] -74220.665 ('2074-12-30 15:43:40.817','68096065-18c8-8aca-fd21-15330ead669d') +[6] 66759.8938 ('2091-09-01 16:07:18.219','bb14f4cc-0b54-9a8c-e835-71333b28c03b') +[-28,-82,9] 168625.3131 ('2002-03-20 18:02:30.321','405bb877-6e28-8b91-cb62-bd82a3fa797c') +[] -19760.167 ('2044-11-08 04:52:03.325','13769348-9e58-0e75-3972-8bbadc150715') +[] 160663.7797 ('2025-04-12 10:17:53.501','e6370321-94f5-97e6-0348-a84e72ff5b42') +[-17,18] 99105.9856 ('1972-05-01 09:23:11.688','02618b9e-97cd-4698-d2e8-3f52f4c5a09a') +[86,77] -116990.3914 ('1981-12-31 02:06:54.198','3ac42bb4-8652-b1a8-10bb-98f0337261f8') +[-109,69,-63] -151527.3587 ('2001-01-17 08:19:56.504','77fe7ee2-f279-2855-bfd2-a7d7cee678cc') +[] -57762.3928 ('1978-08-16 15:47:37.660','ab9a110a-fd8d-3c4c-5a49-34c2005536ce') +[-77] 107274.6407 ('2017-01-12 09:03:02.657','c1ad4f17-cc54-45f3-9410-9c1011653f6d') +[] 107133.641 ('2050-10-05 03:29:27.154','36e576aa-c77f-994e-1925-4a4c40da3a0f') +[] 46672.2176 ('2094-01-21 17:25:39.144','e9ba850d-604e-bc7d-417c-1078e89d4615') +[-87,-122,-65] -86258.4663 ('2081-06-17 00:37:45.498','64795221-9719-7937-b4d2-be5f30065ece') +[-53] -48672.1424 ('1992-06-27 13:27:23.602','7c67bc31-c7bb-6197-fdca-f73329b976f2') +[34] -108954.782 ('2096-07-03 20:06:30.632','9c1b37d7-4ced-9428-a0ae-34c5436b14c4') +[] -168124.2364 ('1987-06-03 02:47:12.945','d1c39af4-f920-5095-b8e2-0f878950167b') +[] -112431.4799 ('2021-07-26 04:04:58.527','da07a72d-7e1f-8890-4c4b-326835d11b39') +[-35,-95,58] -181254.9139 ('2086-11-12 14:17:14.473','22f74d0b-dfc0-3f7a-33f4-8055d8fa7846') +[98,119] 11468.5238 ('2092-02-25 08:07:07.695','a1fb97bf-1885-6715-c233-b88a6cd111e4') +[] 82333.8963 ('1989-11-22 22:38:57.012','a2b82b5b-8331-555c-579b-de4b0eeb7e81') +[-5,-66,69] 32055.8376 ('2040-12-17 13:49:08.704','4537d25e-a2db-ea9a-8e24-a16ed7e0c6e4') +[81,-84,-24] -210815.2512 ('2047-06-09 10:30:06.922','ac3c5b5f-f977-2830-c398-d10a6076a498') +[84,-105] -175413.7733 ('1998-11-03 01:30:21.191','c535feac-1943-c0a1-23f0-645d5406db24') +[58,31] -335.8512 ('1973-07-09 09:21:10.444','24a7dd3d-2565-1de3-05d9-e45fd8ba7729') +[-49,-47] 177399.2836 ('2049-03-15 12:33:00.190','e4432b9b-61e9-d451-dc87-ae3b9da6fd35') +[] 211525.2349 ('2106-01-11 07:44:18.918','23315435-7132-05b5-5a9b-c2c738433a87') +[45,-95,-39] -15314.9732 ('2055-10-29 10:51:12.182','833b2efa-8c72-f5f6-3040-cb4831e8ceb9') +[] 213384.5774 ('2067-02-10 19:02:42.113','0cd7f438-caa7-0d21-867c-1fdb6d67d797') +[99] -147316.5599 ('2000-05-09 17:37:34.776','a3ea6796-38d5-72ff-910d-8b4300831916') +[] 8828.2471 ('1993-11-30 13:53:22.503','7209213f-38bb-cfed-1955-f1fad5a9577a') +[117,9,-35] -134812.6269 ('2065-09-04 20:47:26.589','d33d0d6f-b9c0-2850-4593-cfc9f1e20a4d') +[-35,-58,-101] -9101.5369 ('2023-08-24 17:56:11.695','87fbe3f9-b1f0-c030-a4c0-8662045923b4') +[-58,87] 122510.9099 ('2019-08-09 14:40:29.849','c1d3a2cc-878f-c2c3-4a0b-10e98cda8b4a') +[4,19,58] -13496.8672 ('2027-05-01 06:11:48.659','8996ae31-d670-cbfe-b735-b16b7c3b3476') +[23,-75,-89] -51218.286 ('2010-06-01 22:49:03.396','d32b8b61-cc3e-31fa-2a2a-abefa60bfcee') +[50] -45297.4315 ('2087-04-15 03:46:08.247','04fe9603-97fc-07a4-6248-0f21e408c884') +[-23,17,63] 89185.9462 ('2065-10-26 05:27:12.817','a5fbf764-70b4-8b65-4a8f-7550abca3859') +[-6] -129925.369 ('2013-11-05 03:44:45.233','11db26b3-e2b5-b9fa-6b0e-79c43a2e67ab') +[-72,-108] 203171.5475 ('2000-01-28 06:34:58.032','14d5399e-7949-20c7-0e47-85e2fce5836c') +[-73,34,-27] 2676.7265 ('2057-10-25 11:37:10.049','00049a92-4350-badb-3764-dd7f019b9b31') +[65,-7] -153472.9461 ('1973-04-11 23:34:41.245','e0a0324d-1552-d11e-f3a5-fbd822d206c5') +[] 81837.7838 ('2041-09-20 17:56:39.712','f7923f2c-e526-1706-79b9-58045d9deaa7') +[-113,8] 173192.6905 ('2066-04-02 06:59:59.356','e3013e5c-92e3-c03c-b57a-e1939e00a1a7') +[107] 9694.1102 ('1984-11-02 10:11:34.034','e973db18-07b7-2117-f3ba-e7002adfa939') +[] -76460.9664 ('2051-02-10 06:54:42.143','b8344c22-9e8a-7052-c644-9c3e5989cdf1') +[59,59,0] 27041.7606 ('2083-02-17 15:21:22.547','4d6b137b-a3e1-f36d-2c0c-c8d718dda388') +[-114] 133673.963 ('2005-10-02 16:34:27.452','04785b75-30e5-af8b-547e-d15bcb7f49fb') +[43] -169861.2 ('2006-12-13 06:26:13.923','cb865d38-d961-d7f9-acbb-583b9f31252f') +[] 197115.2174 ('2060-04-08 01:17:00.488','0f26c4b4-b24c-1fd5-c619-31bcf71a4831') +[-25] -200081.9506 ('2055-12-24 23:30:16.276','0b32ad69-2c84-4269-9718-e3171482878a') +[14,110] -40196.4463 ('2084-08-13 16:37:07.588','ed882071-acba-b3ab-5d77-d79a9544a834') +[-62,-71,-82] -154958.9747 ('2100-07-07 23:32:53.741','7711c7c1-0d22-e302-fc86-61ef5e68db96') +[96,-114,-101] 78910.332 ('2100-07-19 12:02:27.109','756bfd26-c4b3-94b8-e991-c7ab7a833b76') +[49] 80117.2267 ('1970-07-04 00:50:56.748','aebac019-9054-4a77-2ccd-8801fc4a7496') +[] 102078.4801 ('2055-01-06 22:22:33.624','21f2e59a-a1ca-5df3-27fd-aa95456cfbe5') +[-106] -108728.4237 ('2020-05-27 08:56:18.121','6b7b6674-9342-2360-4cc0-f7ef8a2404de') +[] 173213.5631 ('2034-01-18 16:04:16.059','2dc0038d-67c1-f0ee-280b-f3f0f536b01a') +[42] 139872.2503 ('2001-07-16 07:09:28.754','d6487da6-1077-1053-f314-9a1079f5df15') +[] 1107.5244 ('2031-02-26 12:06:00.846','b32bee8f-85b7-3c71-bb24-9a0093e6a08c') +[] 85892.8913 ('2088-04-13 11:54:18.514','84f3b59b-8d23-78a6-3032-91392344584f') +[43] -109644.2714 ('1974-07-04 11:45:43.139','cf722ca8-15f5-6fe2-997c-0cf88e95e902') +[] 212557.3762 ('2069-03-03 04:21:08.439','9e676cac-36e6-2962-f7b1-578214f0dfbd') +[-128,55] 80471.0777 ('1970-04-01 15:54:40.257','ca358854-416b-9c95-0b9b-c7fed7bb7cb5') +[-30,-54] -132205.4512 ('2017-12-15 19:54:15.750','3558faa4-2d2f-c533-437f-1e03d3600f1d') +[-116,-72] -91499.667 ('2105-09-23 18:06:17.755','07bb6e47-3234-c268-40d7-332388dc06f8') +[] -201636.5228 ('2085-01-27 04:54:42.717','86c3bdc3-ff0f-1723-07c2-845aa3c02370') +[-103,-39] 44330.7722 ('2064-07-02 08:08:28.068','0869c79d-6bdd-5d2d-a3d1-ffe13f6aa810') +[99] -31035.5391 ('2093-07-25 22:50:23.026','aeb59338-254f-dc09-fbd7-263da415e211') +[101] 157961.4729 ('2036-05-03 23:35:07.845','8b6221a9-8dad-4655-7460-6b3031b06893') +[111] 84732.4403 ('1997-04-06 12:10:18.624','08806a79-59f4-c833-eedc-a200bb851767') +[9,-48] -190491.559 ('2031-11-03 16:47:03.757','914e6166-c96e-e0e4-101a-0bb516cf5a2f') +[-41] -132501.8311 ('2089-11-21 18:38:28.848','6de6cc8d-3c49-641e-fb12-87ed5ecb97b0') +[77] 64903.6579 ('1985-04-17 13:08:03.998','26484b8a-f3f1-587f-7777-bc7a57a689c3') - diff --git a/tests/queries/0_stateless/01087_storage_generate.sql b/tests/queries/0_stateless/01087_storage_generate.sql index 7df9f3931d0..a9320791816 100644 --- a/tests/queries/0_stateless/01087_storage_generate.sql +++ b/tests/queries/0_stateless/01087_storage_generate.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS test_table; -CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE=GenerateRandom(); +CREATE TABLE test_table(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) ENGINE = GenerateRandom(); SELECT COUNT(*) FROM (SELECT * FROM test_table LIMIT 100); DROP TABLE IF EXISTS test_table; @@ -7,11 +7,10 @@ DROP TABLE IF EXISTS test_table; SELECT '-'; DROP TABLE IF EXISTS test_table_2; -CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'Asia/Istanbul'), UUID)) ENGINE=GenerateRandom(10, 5, 3); +CREATE TABLE test_table_2(a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3, 'UTC'), UUID)) ENGINE = GenerateRandom(10, 5, 3); SELECT * FROM test_table_2 LIMIT 100; SELECT '-'; DROP TABLE IF EXISTS test_table_2; - diff --git a/tests/queries/0_stateless/01087_table_function_generate.reference b/tests/queries/0_stateless/01087_table_function_generate.reference index ef7eac41ca2..d62ff5618fc 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/tests/queries/0_stateless/01087_table_function_generate.reference @@ -46,29 +46,29 @@ h \N o - -Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') -2113-06-12 2050-12-17 02:46:35 2096-02-16 22:18:22 -2141-08-09 2013-10-17 23:35:26 1976-01-24 12:52:48 -2039-08-16 1974-11-17 23:22:46 1980-03-04 21:02:50 -1997-04-11 1972-09-18 23:44:08 2040-07-10 14:46:42 -2103-11-03 2044-11-23 20:57:12 1970-10-09 02:30:14 -2066-11-19 2029-12-10 03:13:55 2106-01-30 21:52:44 -2064-08-14 2016-07-14 11:33:45 2096-12-12 00:40:50 -2046-09-13 2085-07-10 18:51:14 2096-01-15 16:31:33 -2008-03-16 2047-05-16 23:28:36 2103-02-11 16:44:39 -2000-07-07 2105-07-19 19:29:06 1980-01-02 05:18:22 +Date DateTime(\'UTC\') DateTime(\'UTC\') +2113-06-12 2050-12-16 23:46:35 2096-02-16 19:18:22 +2141-08-09 2013-10-17 19:35:26 1976-01-24 09:52:48 +2039-08-16 1974-11-17 20:22:46 1980-03-04 18:02:50 +1997-04-11 1972-09-18 20:44:08 2040-07-10 11:46:42 +2103-11-03 2044-11-23 17:57:12 1970-10-08 23:30:14 +2066-11-19 2029-12-10 00:13:55 2106-01-30 18:52:44 +2064-08-14 2016-07-14 08:33:45 2096-12-11 21:40:50 +2046-09-13 2085-07-10 15:51:14 2096-01-15 13:31:33 +2008-03-16 2047-05-16 20:28:36 2103-02-11 13:44:39 +2000-07-07 2105-07-19 16:29:06 1980-01-02 02:18:22 - -DateTime64(3, \'Asia/Istanbul\') DateTime64(6, \'Asia/Istanbul\') DateTime64(6, \'Asia/Istanbul\') -1978-06-07 23:50:57.320 2013-08-28 10:21:54.010758 1991-08-25 16:23:26.140215 -1978-08-25 17:07:25.427 2034-05-02 20:49:42.148578 2015-08-26 15:26:31.783160 -2037-04-04 10:50:56.898 2055-05-28 11:12:48.819271 2068-12-26 09:58:49.635722 -2041-09-02 07:07:24.891 2051-08-01 14:15:40.218654 2081-10-19 15:55:40.057084 -1976-07-15 23:59:41.974 2075-01-29 20:34:10.425321 1996-12-31 10:51:28.562331 -1974-11-03 08:09:51.992 2010-04-19 04:09:03.451487 1994-05-15 15:42:53.162162 -2061-10-11 20:14:02.729 1981-07-22 10:13:45.729103 2084-05-27 08:59:37.746021 -1989-12-13 02:01:16.532 1992-10-05 07:07:57.973222 2037-10-24 18:53:50.985504 -1992-12-28 12:26:04.030 1971-07-29 09:20:38.230976 1980-03-26 18:49:55.428516 -2051-12-11 10:09:13.162 1982-01-12 03:25:45.754492 2010-05-17 11:01:28.452864 +DateTime64(3, \'UTC\') DateTime64(6, \'UTC\') DateTime64(6, \'UTC\') +1978-06-07 20:50:57.320 2013-08-28 06:21:54.010758 1991-08-25 13:23:26.140215 +1978-08-25 14:07:25.427 2034-05-02 17:49:42.148578 2015-08-26 12:26:31.783160 +2037-04-04 07:50:56.898 2055-05-28 08:12:48.819271 2068-12-26 06:58:49.635722 +2041-09-02 04:07:24.891 2051-08-01 11:15:40.218654 2081-10-19 12:55:40.057084 +1976-07-15 20:59:41.974 2075-01-29 17:34:10.425321 1996-12-31 07:51:28.562331 +1974-11-03 05:09:51.992 2010-04-19 00:09:03.451487 1994-05-15 11:42:53.162162 +2061-10-11 17:14:02.729 1981-07-22 06:13:45.729103 2084-05-27 05:59:37.746021 +1989-12-12 23:01:16.532 1992-10-05 04:07:57.973222 2037-10-24 15:53:50.985504 +1992-12-28 09:26:04.030 1971-07-29 06:20:38.230976 1980-03-26 15:49:55.428516 +2051-12-11 07:09:13.162 1982-01-12 00:25:45.754492 2010-05-17 07:01:28.452864 Date32 1934-01-06 2039-08-16 @@ -225,25 +225,25 @@ U6 \'%Y~t9 RL,{Xs\\tw - -[] -27467.1221 ('2021-03-08 03:39:14.331','08ec773f-cded-8c46-727f-954768082cbf') -[] 204013.7193 ('2026-05-05 05:20:23.160','30f6d580-cb25-8d4f-f869-fc10128b3389') -[-122] -9432.2617 ('2001-08-23 08:05:41.222','f7bf2154-78c3-8920-e4d3-a374e22998a4') -[-30,61] -133488.2399 ('2048-05-14 09:05:06.021','a6af106c-b321-978b-fa79-338c9e342b5a') -[-1] 58720.0591 ('1976-06-07 23:26:18.162','fc038af0-ba31-8fdc-1847-37328ef161b0') -[1] -18736.7874 ('1977-03-10 04:41:16.215','3259d377-a92d-3557-9045-4ad1294d55d5') -[34,-10] -99367.9009 ('2031-05-08 10:00:41.084','0b38ebc5-20a6-be3d-8543-23ce3546f49c') -[110] 31562.7502 ('2045-02-27 11:46:14.976','74116384-cb3e-eb00-0102-fb30ddea5d5f') -[114] -84125.1554 ('2023-06-06 06:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea') -[124] -114719.5228 ('2010-11-11 22:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7') +[] -27467.1221 ('2021-03-08 00:39:14.331','08ec773f-cded-8c46-727f-954768082cbf') +[] 204013.7193 ('2026-05-05 02:20:23.160','30f6d580-cb25-8d4f-f869-fc10128b3389') +[-122] -9432.2617 ('2001-08-23 04:05:41.222','f7bf2154-78c3-8920-e4d3-a374e22998a4') +[-30,61] -133488.2399 ('2048-05-14 06:05:06.021','a6af106c-b321-978b-fa79-338c9e342b5a') +[-1] 58720.0591 ('1976-06-07 20:26:18.162','fc038af0-ba31-8fdc-1847-37328ef161b0') +[1] -18736.7874 ('1977-03-10 01:41:16.215','3259d377-a92d-3557-9045-4ad1294d55d5') +[34,-10] -99367.9009 ('2031-05-08 07:00:41.084','0b38ebc5-20a6-be3d-8543-23ce3546f49c') +[110] 31562.7502 ('2045-02-27 08:46:14.976','74116384-cb3e-eb00-0102-fb30ddea5d5f') +[114] -84125.1554 ('2023-06-06 03:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea') +[124] -114719.5228 ('2010-11-11 19:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7') - -[] 1900051923 { -189530.5846 h -5.6279699579452485e47 ('1984-12-06','2028-08-17 06:05:01','2036-04-02 23:52:28.468','4b3d498c-dd44-95c1-5b75-921504ec5d8d') F743 -[-102,-118] 392272782 Eb -14818.02 o -2.664492247169164e59 ('2082-12-26','2052-09-09 06:50:50','2088-04-21 05:07:08.245','aeb9c26e-0ee7-2b8e-802b-2a96319b8e60') CBF4 -[-71] 775049089 \N -158115.1178 w 4.1323844687113747e-305 ('2108-04-19','2090-07-31 16:45:26','2076-07-10 09:11:06.385','57c69bc6-dddd-0975-e932-a7b5173a1304') EB1D -[-28,100] 3675466147 { -146685.1749 h 3.6676044396877755e142 ('2017-10-25','2100-02-28 18:07:18','2055-10-14 06:36:20.056','14949dae-dfa8-a124-af83-887348b2f609') 6D88 -[-23] 2514120753 (`u, -119659.6174 w 1.3231258347475906e34 ('2141-04-06','2074-08-10 06:25:12','1976-12-04 18:31:55.745','86a9b3c1-4593-4d56-7762-3aa1dd22cbbf') AD43 -[11,-36] 3308237300 \N 171205.1896 \N 5.634708707075817e195 ('1974-10-31','1993-12-24 09:38:45','2038-07-15 05:22:51.805','63d999b8-8cca-e237-c4a4-4dd7d0096f65') 609E -[39] 1614362420 `4A8P 157144.063 o -1.1843143253872814e-255 ('2147-08-18','2072-09-28 18:27:27','2073-07-10 12:19:58.146','6483f5c0-8733-364c-4fa0-9948d32e8903') A886 -[48,-120] 3848918261 1 Date: Fri, 27 May 2022 02:20:16 +0200 Subject: [PATCH 568/615] Fix more tests --- .../0_stateless/00945_bloom_filter_index.sql | 18 +++++++++--------- .../01414_low_cardinality_nullable.sql | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql index c06f8e87173..fc18a4a4dc5 100644 --- a/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -42,7 +42,7 @@ SELECT COUNT() FROM bloom_filter_types_test WHERE u64 = 1 SETTINGS max_rows_to_r SELECT COUNT() FROM bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; @@ -68,7 +68,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 1); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-02')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '1'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('1', 5)); @@ -83,7 +83,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 5); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-06')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:05', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '5'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('5', 5)); @@ -98,7 +98,7 @@ SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(u64, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f32, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(f64, 10); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date, toDate('1970-01-11')); -SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:10', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(str, '10'); SELECT COUNT() FROM bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('10', 5)); @@ -121,7 +121,7 @@ SELECT COUNT() FROM bloom_filter_null_types_test WHERE u64 = 1 SETTINGS max_rows SELECT COUNT() FROM bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul') SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; @@ -219,7 +219,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 1); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-02')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '1'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('1', 5)); @@ -234,7 +234,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 5); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-06')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:05', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '5'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('5', 5)); @@ -249,7 +249,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 10); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-11')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 02:00:10', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '10'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('10', 5)); @@ -279,7 +279,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(u64, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f32, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(f64, 100); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-04-11')); -SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Asia/Istanbul')); +SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 02:01:40', 'Asia/Istanbul')); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(str, '100'); SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('100', 5)); diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql index 871d74d7fb9..2d3d31e9b5c 100644 --- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql +++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql @@ -140,7 +140,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 1); SELECT count() FROM lc_nullable WHERE has(f32, 1); SELECT count() FROM lc_nullable WHERE has(f64, 1); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-02')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Asia/Istanbul')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 02:00:01', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '1'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('1', 5)); @@ -168,7 +168,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 5); SELECT count() FROM lc_nullable WHERE has(f32, 5); SELECT count() FROM lc_nullable WHERE has(f64, 5); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-06')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Asia/Istanbul')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 02:00:05', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '5'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('5', 5)); @@ -183,7 +183,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 10); SELECT count() FROM lc_nullable WHERE has(f32, 10); SELECT count() FROM lc_nullable WHERE has(f64, 10); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-01-11')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Asia/Istanbul')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 02:00:10', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '10'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('10', 5)); @@ -213,7 +213,7 @@ SELECT count() FROM lc_nullable WHERE has(u64, 100); SELECT count() FROM lc_nullable WHERE has(f32, 100); SELECT count() FROM lc_nullable WHERE has(f64, 100); SELECT count() FROM lc_nullable WHERE has(date, toDate('1970-04-11')); -SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Asia/Istanbul')); +SELECT count() FROM lc_nullable WHERE has(date_time, toDateTime('1970-01-01 02:01:40', 'Asia/Istanbul')); SELECT count() FROM lc_nullable WHERE has(str, '100'); SELECT count() FROM lc_nullable WHERE has(fixed_string, toFixedString('100', 5)); From 3a9239b79f0434209f96816fd37c289d991c839f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 27 May 2022 04:05:32 +0200 Subject: [PATCH 569/615] Revert "RFC: Fix converting types for UNION queries (may produce LOGICAL_ERROR)" --- .../InterpreterSelectWithUnionQuery.cpp | 4 -- src/Interpreters/SelectQueryOptions.h | 8 ---- src/Interpreters/TreeRewriter.cpp | 30 +------------ .../02227_union_match_by_name.reference | 44 ------------------- .../0_stateless/02227_union_match_by_name.sql | 3 -- 5 files changed, 2 insertions(+), 87 deletions(-) delete mode 100644 tests/queries/0_stateless/02227_union_match_by_name.reference delete mode 100644 tests/queries/0_stateless/02227_union_match_by_name.sql diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 94ebfd73513..7506c3013cb 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -46,10 +46,6 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( if (!num_children) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); - /// This is required for UNION to match headers correctly. - if (num_children > 1) - options.reorderColumns(); - /// Note that we pass 'required_result_column_names' to first SELECT. /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, /// because names could be different. diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index b0183e2761b..31ed9d8c686 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -31,8 +31,6 @@ struct SelectQueryOptions bool only_analyze = false; bool modify_inplace = false; bool remove_duplicates = false; - /// This is required for UNION to match headers correctly. - bool reorder_columns_as_required_header = false; bool ignore_quota = false; bool ignore_limits = false; /// This flag is needed to analyze query ignoring table projections. @@ -99,12 +97,6 @@ struct SelectQueryOptions return *this; } - SelectQueryOptions & reorderColumns(bool value = true) - { - reorder_columns_as_required_header = value; - return *this; - } - SelectQueryOptions & noSubquery() { subquery_depth = 0; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 11a392f3adf..c90421d6f4f 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -422,7 +422,7 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query) /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. /// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible. /// Also remove all INTERPOLATE columns which are not in SELECT anymore. -void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups, bool reorder_columns_as_required_header) +void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups) { ASTs & elements = select_query->select()->children; @@ -453,29 +453,6 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const NameSet remove_columns; - /// Resort columns according to required_result_columns. - if (reorder_columns_as_required_header && !required_result_columns.empty()) - { - std::unordered_map name_pos; - { - size_t pos = 0; - for (const auto & name : required_result_columns) - name_pos[name] = pos++; - } - std::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs) - { - String lhs_name = lhs->getAliasOrColumnName(); - String rhs_name = rhs->getAliasOrColumnName(); - size_t lhs_pos = name_pos.size(); - size_t rhs_pos = name_pos.size(); - if (auto it = name_pos.find(lhs_name); it != name_pos.end()) - lhs_pos = it->second; - if (auto it = name_pos.find(rhs_name); it != name_pos.end()) - rhs_pos = it->second; - return lhs_pos < rhs_pos; - }); - } - for (const auto & elem : elements) { String name = elem->getAliasOrColumnName(); @@ -488,8 +465,6 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const } else if (select_query->distinct || hasArrayJoin(elem)) { - /// ARRAY JOIN cannot be optimized out since it may change number of rows, - /// so as DISTINCT. new_elements.push_back(elem); } else @@ -1160,7 +1135,6 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( size_t subquery_depth = select_options.subquery_depth; bool remove_duplicates = select_options.remove_duplicates; - bool reorder_columns_as_required_header = select_options.reorder_columns_as_required_header; const auto & settings = getContext()->getSettingsRef(); @@ -1212,7 +1186,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. - removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates, reorder_columns_as_required_header); + removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates); /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze); diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference deleted file mode 100644 index 72c4987a3d2..00000000000 --- a/tests/queries/0_stateless/02227_union_match_by_name.reference +++ /dev/null @@ -1,44 +0,0 @@ --- { echo } -EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); -Expression (Projection) -Header: avgWeighted(x, y) Nullable(Float64) - Expression (Before ORDER BY) - Header: avgWeighted(x, y) Nullable(Float64) - Aggregating - Header: avgWeighted(x, y) Nullable(Float64) - Expression (Before GROUP BY) - Header: x Nullable(UInt8) - y UInt8 - Union - Header: x Nullable(UInt8) - y UInt8 - Expression (Conversion before UNION) - Header: x Nullable(UInt8) - y UInt8 - Expression (Projection) - Header: x UInt8 - y UInt8 - Expression (Before ORDER BY) - Header: 255 UInt8 - 1 UInt8 - dummy UInt8 - SettingQuotaAndLimits (Set limits and quota after reading from storage) - Header: dummy UInt8 - ReadFromStorage (SystemOne) - Header: dummy UInt8 - Expression (Conversion before UNION) - Header: x Nullable(UInt8) - y UInt8 - Expression (Projection) - Header: x Nullable(Nothing) - y UInt8 - Expression (Before ORDER BY) - Header: NULL Nullable(Nothing) - 1 UInt8 - dummy UInt8 - SettingQuotaAndLimits (Set limits and quota after reading from storage) - Header: dummy UInt8 - ReadFromStorage (SystemOne) - Header: dummy UInt8 -SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); -255 diff --git a/tests/queries/0_stateless/02227_union_match_by_name.sql b/tests/queries/0_stateless/02227_union_match_by_name.sql deleted file mode 100644 index cc0ab8ba5aa..00000000000 --- a/tests/queries/0_stateless/02227_union_match_by_name.sql +++ /dev/null @@ -1,3 +0,0 @@ --- { echo } -EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); -SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); From 393846e1eed2c90ff98134254b94a3721642a73b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 04:08:04 +0200 Subject: [PATCH 570/615] Fix test --- tests/queries/0_stateless/01699_timezoneOffset.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference index 860829f0ce6..a1cc6391e6f 100644 --- a/tests/queries/0_stateless/01699_timezoneOffset.reference +++ b/tests/queries/0_stateless/01699_timezoneOffset.reference @@ -1,4 +1,4 @@ -DST boundary test for Asia/Istanbul: +DST boundary test for Europe/Moscow: 0 1981-04-01 22:40:00 14400 354998400 1 1981-04-01 22:50:00 14400 354999000 2 1981-04-01 23:00:00 14400 354999600 @@ -70,7 +70,7 @@ DST boundary test for Australia/Lord_Howe: 15 2019-04-07 03:00:00 37800 1554568200 16 2019-04-07 03:10:00 37800 1554568800 17 2019-04-07 03:20:00 37800 1554569400 -4 days test in batch comparing with manually computation result for Asia/Istanbul: +4 days test in batch comparing with manually computation result for Europe/Moscow: 4 days test in batch comparing with manually computation result for Asia/Tehran: 4 days test in batch comparing with manually computation result for Australia/Lord_Howe Moscow DST Years: From 841858ec303abeec27c86282dccc010cfceaa1ff Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 27 May 2022 13:13:36 +0200 Subject: [PATCH 571/615] Revert "Revert "(only with zero-copy replication, non-production experimental feature not recommended to use) fix possible deadlock during fetching part"" --- src/Storages/MergeTree/DataPartsExchange.cpp | 41 +++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 620466b8035..f6d53979663 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -470,29 +470,28 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( creds.setPassword(password); } - PooledReadWriteBufferFromHTTP in{ + std::unique_ptr in = std::make_unique( uri, Poco::Net::HTTPRequest::HTTP_POST, - {}, + nullptr, timeouts, creds, DBMS_DEFAULT_BUFFER_SIZE, 0, /* no redirects */ - data_settings->replicated_max_parallel_fetches_for_host - }; + static_cast(data_settings->replicated_max_parallel_fetches_for_host)); - int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); + int server_protocol_version = parse(in->getResponseCookie("server_protocol_version", "0")); ReservationPtr reservation; size_t sum_files_size = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) { - readBinary(sum_files_size, in); + readBinary(sum_files_size, *in); if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { IMergeTreeDataPart::TTLInfos ttl_infos; String ttl_infos_string; - readBinary(ttl_infos_string, in); + readBinary(ttl_infos_string, *in); ReadBufferFromString ttl_infos_buffer(ttl_infos_string); assertString("ttl format version: 1\n", ttl_infos_buffer); ttl_infos.read(ttl_infos_buffer); @@ -529,13 +528,13 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( String part_type = "Wide"; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) - readStringBinary(part_type, in); + readStringBinary(part_type, *in); UUID part_uuid = UUIDHelpers::Nil; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) - readUUIDText(part_uuid, in); + readUUIDText(part_uuid, *in); - String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); + String remote_fs_metadata = parse(in->getResponseCookie("remote_fs_metadata", "")); if (!remote_fs_metadata.empty()) { if (!try_zero_copy) @@ -549,7 +548,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( try { - return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); + return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, *in, throttler); } catch (const Exception & e) { @@ -557,6 +556,18 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( throw; LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy.")); + + /// It's important to release session from HTTP pool. Otherwise it's possible to get deadlock + /// on http pool. + try + { + in.reset(); + } + catch (...) + { + tryLogCurrentException(log); + } + /// Try again but without zero-copy return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); @@ -570,16 +581,16 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( part_info.partition_id, part_name, new_part_path, replica_path, uri, to_detached, sum_files_size); - in.setNextCallback(ReplicatedFetchReadCallback(*entry)); + in->setNextCallback(ReplicatedFetchReadCallback(*entry)); size_t projections = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) - readBinary(projections, in); + readBinary(projections, *in); MergeTreeData::DataPart::Checksums checksums; return part_type == "InMemory" - ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) - : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); + ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, *in, projections, throttler) + : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, *in, projections, checksums, throttler); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( From 461bb42fb8fd79146224a6ba8cd59e619a57ea6e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 27 May 2022 13:40:07 +0200 Subject: [PATCH 572/615] Fix flaky test --- .../test_replicated_merge_tree_hdfs_zero_copy/test.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py index 23f465eaabd..7d65bed3901 100644 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py @@ -4,6 +4,7 @@ import time import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry from pyhdfs import HdfsClient @@ -264,8 +265,8 @@ def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy): node1.query("OPTIMIZE TABLE ttl_move_test FINAL") node2.query("SYSTEM SYNC REPLICA ttl_move_test", timeout=30) - assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" - assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" + assert_eq_with_retry(node1, "SELECT count() FROM ttl_move_test", "2") + assert_eq_with_retry(node2, "SELECT count() FROM ttl_move_test", "2") assert ( node1.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") == "(10),(11)" @@ -299,8 +300,9 @@ def test_hdfs_zero_copy_with_ttl_delete(cluster): node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") node2.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) - assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" - assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" + assert_eq_with_retry(node1, "SELECT count() FROM ttl_delete_test", "1") + assert_eq_with_retry(node2, "SELECT count() FROM ttl_delete_test", "1") + assert ( node1.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") == "(11)" From c79600c4c8568e5bedd772e4d2a682d1082a4d59 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 27 May 2022 13:44:29 +0200 Subject: [PATCH 573/615] Fix build --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f6d53979663..d6acf909c1e 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -519,7 +519,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (!disk) disk = reservation->getDisk(); - UInt64 revision = parse(in.getResponseCookie("disk_revision", "0")); + UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); if (revision) disk->syncRevision(revision); From d68c30a92e7d4714c6f2ba39284bc7f8967a050e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 27 May 2022 12:27:25 +0000 Subject: [PATCH 574/615] fix tests --- tests/queries/0_stateless/02306_part_types_profile_events.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02306_part_types_profile_events.sql b/tests/queries/0_stateless/02306_part_types_profile_events.sql index fd6178941f2..4b13504612e 100644 --- a/tests/queries/0_stateless/02306_part_types_profile_events.sql +++ b/tests/queries/0_stateless/02306_part_types_profile_events.sql @@ -29,7 +29,7 @@ SELECT count(), sum(ProfileEvents['InsertedWideParts']), sum(ProfileEvents['Inse AND query ILIKE 'INSERT INTO%' AND type = 'QueryFinish'; SELECT count(), sum(ProfileEvents['MergedIntoWideParts']), sum(ProfileEvents['MergedIntoCompactParts']) - FROM system.query_log WHERE has(databases, currentDatabase()) + FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02306_part_types_profile_events' AND query ILIKE 'OPTIMIZE TABLE%' AND type = 'QueryFinish'; @@ -39,6 +39,6 @@ SELECT part_type FROM system.part_log WHERE database = currentDatabase() SELECT part_type, count() > 0 FROM system.part_log WHERE database = currentDatabase() AND table = 't_parts_profile_events' AND event_type = 'MergeParts' - GROUP BY part_type; + GROUP BY part_type ORDER BY part_type; DROP TABLE t_parts_profile_events; From 735d2dfebd390c42a27554f85d9ccc87768ffc94 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 27 May 2022 13:55:20 +0300 Subject: [PATCH 575/615] tests: fix 01317_no_password_in_command_line flakiness (and make it race free) Before it was possible not to check if the query was gone already, also it checks all processes not only the process of the client for the possible password. v2: make it parallel aware Signed-off-by: Azat Khuzhin --- ...1317_no_password_in_command_line.reference | 2 - .../01317_no_password_in_command_line.sh | 72 ++++++++----------- 2 files changed, 30 insertions(+), 44 deletions(-) diff --git a/tests/queries/0_stateless/01317_no_password_in_command_line.reference b/tests/queries/0_stateless/01317_no_password_in_command_line.reference index aa47d0d46d4..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01317_no_password_in_command_line.reference +++ b/tests/queries/0_stateless/01317_no_password_in_command_line.reference @@ -1,2 +0,0 @@ -0 -0 diff --git a/tests/queries/0_stateless/01317_no_password_in_command_line.sh b/tests/queries/0_stateless/01317_no_password_in_command_line.sh index c9886aca31e..5b95f077ea2 100755 --- a/tests/queries/0_stateless/01317_no_password_in_command_line.sh +++ b/tests/queries/0_stateless/01317_no_password_in_command_line.sh @@ -7,52 +7,40 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS user" -$CLICKHOUSE_CLIENT --query "CREATE USER user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" - -# False positive result due to race condition with sleeps is Ok. - -$CLICKHOUSE_CLIENT --user user --password hello --query "SELECT sleep(1)" & -bg_query=$! +user=user_$CLICKHOUSE_TEST_UNIQUE_NAME +$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS $user" +$CLICKHOUSE_CLIENT --query "CREATE USER $user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" +trap '$CLICKHOUSE_CLIENT --query "DROP USER $user"' EXIT # Wait for query to start executing. At that time, the password should be cleared. -for _ in {1..20} -do - if $CLICKHOUSE_CLIENT --query "SHOW PROCESSLIST" | grep -q 'SELECT sleep(1)' - then - break - fi +function wait_query_pid() +{ + local query_id=$1 && shift - if ! kill -0 -- $bg_query 2>/dev/null - then - # The SELECT sleep(1) query finished earlier that we could grep for it in the process list, but it should have run for at least one second. It is Ok. - break - fi -done + for _ in {1..20}; do + if [ "$($CLICKHOUSE_CLIENT --param_query_id "$query_id" --query "SELECT count() FROM system.processes WHERE query_id = {query_id:String}")" -eq 1 ]; then + break + fi + sleep 0.3 + done +} -ps auxw | grep -F -- '--password' | grep -F hello ||: -wait - -# Once again with different syntax -$CLICKHOUSE_CLIENT --user user --password=hello --query "SELECT sleep(1)" & +# --password +query_id=first-$CLICKHOUSE_TEST_UNIQUE_NAME +$CLICKHOUSE_CLIENT --query_id "$query_id" --user "$user" --password hello --max_block_size 1 --query "SELECT sleepEachRow(1) FROM system.numbers LIMIT 100" >& /dev/null & bg_query=$! - -# Wait for query to start executing. At that time, the password should be cleared. -for _ in {1..20} -do - if $CLICKHOUSE_CLIENT --query "SHOW PROCESSLIST" | grep -q 'SELECT sleep(1)' - then - break - fi - - if ! kill -0 -- $bg_query 2>/dev/null - then - # The SELECT sleep(1) query finished earlier that we could grep for it in the process list, but it should have run for at least one second. It is Ok. - break - fi -done - -ps auxw | grep -F -- '--password' | grep -F hello ||: +wait_query_pid "$query_id" +ps u --no-header $bg_query | grep -F -- '--password' | grep -F hello ||: +grep -F -- '--password' < "/proc/$bg_query/comm" | grep -F hello ||: +$CLICKHOUSE_CLIENT --format Null --param_query_id "$query_id" -q "KILL QUERY WHERE query_id = {query_id:String} SYNC" wait -$CLICKHOUSE_CLIENT --query "DROP USER user" +# --password= +query_id=second-$CLICKHOUSE_TEST_UNIQUE_NAME +$CLICKHOUSE_CLIENT --query_id "$query_id" --user "$user" --password=hello --max_block_size 1 --query "SELECT sleepEachRow(1) FROM system.numbers LIMIT 100" >& /dev/null & +bg_query=$! +wait_query_pid "$query_id" +ps u --no-header $bg_query | grep -F -- '--password' | grep -F hello ||: +grep -F -- '--password' < "/proc/$bg_query/comm" | grep -F hello ||: +$CLICKHOUSE_CLIENT --format Null --param_query_id "$query_id" -q "KILL QUERY WHERE query_id = {query_id:String} SYNC" +wait From f8762667b0d799e5068dfe15737eb6b0b3a9f8d5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 27 May 2022 12:37:11 +0000 Subject: [PATCH 576/615] Use jepsen CI directly in PR workflow --- .github/workflows/jepsen.yml | 5 +---- .github/workflows/pull_request.yml | 8 ++++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 9b7c4e63d48..1682cd1e812 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -7,11 +7,8 @@ concurrency: on: # yamllint disable-line rule:truthy schedule: - cron: '0 */6 * * *' - workflow_run: - workflows: ["PullRequestCI"] - types: - - completed workflow_dispatch: + workflow_call: jobs: KeeperJepsenRelease: runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 01490dff59e..01fbcd42559 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3272,6 +3272,13 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" +############################################################################################# +###################################### JEPSEN TESTS ######################################### +############################################################################################# + Jepsen: + needs: [BuilderBinRelease] + uses: ./.github/workflows/jepsen.yml + FinishCheck: needs: - StyleCheck @@ -3336,6 +3343,7 @@ jobs: - SplitBuildSmokeTest - CompatibilityCheck - IntegrationTestsFlakyCheck + - Jepsen runs-on: [self-hosted, style-checker] steps: - name: Clear repository From fc3d39629e66cf0f32f5f8281fae0bd9578016bb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 27 May 2022 12:48:11 +0000 Subject: [PATCH 577/615] Temporarly remove dependancy on FastTest --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 01fbcd42559..149a9981203 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -298,7 +298,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" BuilderBinRelease: - needs: [DockerHubPush, FastTest] + needs: [DockerHubPush] runs-on: [self-hosted, builder] steps: - name: Set envs From 30f2c9ad58ef235542391a7b84c12b8d468a9f56 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 27 May 2022 13:31:21 +0000 Subject: [PATCH 578/615] Polish workflow --- .github/workflows/pull_request.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 149a9981203..0c339c5ed4b 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -298,7 +298,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" BuilderBinRelease: - needs: [DockerHubPush] + needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] steps: - name: Set envs @@ -3276,8 +3276,8 @@ jobs: ###################################### JEPSEN TESTS ######################################### ############################################################################################# Jepsen: - needs: [BuilderBinRelease] - uses: ./.github/workflows/jepsen.yml + needs: [BuilderBinRelease] + uses: ./.github/workflows/jepsen.yml # yamllint disable-line FinishCheck: needs: From 2ca5b7812d3719c016141135a37a4a4bfad0736a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 27 May 2022 14:15:34 +0000 Subject: [PATCH 579/615] Ignore reusable workflow error --- .github/workflows/pull_request.yml | 2 +- utils/check-style/check-workflows | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 0c339c5ed4b..6d56d5e3105 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3277,7 +3277,7 @@ jobs: ############################################################################################# Jepsen: needs: [BuilderBinRelease] - uses: ./.github/workflows/jepsen.yml # yamllint disable-line + uses: ./.github/workflows/jepsen.yml FinishCheck: needs: diff --git a/utils/check-style/check-workflows b/utils/check-style/check-workflows index 6e9cb87ed36..df2292d84ca 100755 --- a/utils/check-style/check-workflows +++ b/utils/check-style/check-workflows @@ -6,4 +6,4 @@ GIT_ROOT=$(git rev-parse --show-cdup) GIT_ROOT=${GIT_ROOT:-.} act --list --directory="$GIT_ROOT" 1>/dev/null 2>&1 || act --list --directory="$GIT_ROOT" 2>&1 -actionlint || : +actionlint -ignore 'reusable workflow call.+' || : From 540353566cb0bdbb954a195a2484d0e0f65fe5aa Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 27 May 2022 15:14:10 +0200 Subject: [PATCH 580/615] Added LpNorm and LpDistance functions for arrays --- src/Functions/array/arrayDistance.cpp | 126 +++++++++++++++--- src/Functions/array/arrayNorm.cpp | 114 ++++++++++++---- src/Functions/vectorFunctions.cpp | 36 ++++- .../02282_array_distance.reference | 4 + .../0_stateless/02282_array_distance.sql | 6 + .../0_stateless/02283_array_norm.reference | 27 ++-- .../queries/0_stateless/02283_array_norm.sql | 16 ++- 7 files changed, 262 insertions(+), 67 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 7c1cddf4435..2121189dacb 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -7,12 +7,13 @@ #include #include #include -#include "base/range.h" +#include namespace DB { namespace ErrorCodes { + extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; @@ -22,6 +23,8 @@ struct L1Distance { static inline String name = "L1"; + struct ConstParams {}; + template struct State { @@ -29,13 +32,13 @@ struct L1Distance }; template - static void accumulate(State & state, ResultType x, ResultType y) + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams &) { state.sum += fabs(x - y); } template - static ResultType finalize(const State & state) + static ResultType finalize(const State & state, const ConstParams &) { return state.sum; } @@ -45,6 +48,8 @@ struct L2Distance { static inline String name = "L2"; + struct ConstParams {}; + template struct State { @@ -52,22 +57,53 @@ struct L2Distance }; template - static void accumulate(State & state, ResultType x, ResultType y) + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams &) { state.sum += (x - y) * (x - y); } template - static ResultType finalize(const State & state) + static ResultType finalize(const State & state, const ConstParams &) { return sqrt(state.sum); } }; +struct LpDistance +{ + static inline String name = "Lp"; + + struct ConstParams + { + Float64 power; + Float64 inverted_power; + }; + + template + struct State + { + FloatType sum = 0; + }; + + template + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams & params) + { + state.sum += std::pow(fabs(x - y), params.power); + } + + template + static ResultType finalize(const State & state, const ConstParams & params) + { + return std::pow(state.sum, params.inverted_power); + } +}; + struct LinfDistance { static inline String name = "Linf"; + struct ConstParams {}; + template struct State { @@ -75,21 +111,24 @@ struct LinfDistance }; template - static void accumulate(State & state, ResultType x, ResultType y) + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams &) { state.dist = fmax(state.dist, fabs(x - y)); } template - static ResultType finalize(const State & state) + static ResultType finalize(const State & state, const ConstParams &) { return state.dist; } }; + struct CosineDistance { static inline String name = "Cosine"; + struct ConstParams {}; + template struct State { @@ -99,7 +138,7 @@ struct CosineDistance }; template - static void accumulate(State & state, ResultType x, ResultType y) + static void accumulate(State & state, ResultType x, ResultType y, const ConstParams &) { state.dot_prod += x * y; state.x_squared += x * x; @@ -107,7 +146,7 @@ struct CosineDistance } template - static ResultType finalize(const State & state) + static ResultType finalize(const State & state, const ConstParams &) { return 1 - state.dot_prod / sqrt(state.x_squared * state.y_squared); } @@ -121,17 +160,18 @@ public: String getName() const override { return name; } static FunctionPtr create(ContextPtr) { return std::make_shared>(); } size_t getNumberOfArguments() const override { return 2; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { DataTypes types; - for (const auto & argument : arguments) + for (size_t i = 0; i < 2; ++i) { - const auto * array_type = checkAndGetDataType(argument.type.get()); + const auto * array_type = checkAndGetDataType(arguments[i].type.get()); if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} must be array.", getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array.", i, getName()); types.push_back(array_type->getNestedType()); } @@ -221,7 +261,7 @@ private: { #define ON_TYPE(type) \ case TypeIndex::type: \ - return executeWithTypes(arguments[0].column, arguments[1].column, input_rows_count); \ + return executeWithTypes(arguments[0].column, arguments[1].column, input_rows_count, arguments); \ break; SUPPORTED_TYPES(ON_TYPE) @@ -237,15 +277,15 @@ private: } template - ColumnPtr executeWithTypes(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count) const + ColumnPtr executeWithTypes(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { if (typeid_cast(col_x.get())) { - return executeWithTypesFirstArgConst(col_x, col_y, input_rows_count); + return executeWithTypesFirstArgConst(col_x, col_y, input_rows_count, arguments); } else if (typeid_cast(col_y.get())) { - return executeWithTypesFirstArgConst(col_y, col_x, input_rows_count); + return executeWithTypesFirstArgConst(col_y, col_x, input_rows_count, arguments); } col_x = col_x->convertToFullColumnIfConst(); @@ -273,6 +313,8 @@ private: } } + const typename Kernel::ConstParams kernel_params = initConstParams(arguments); + auto result = ColumnVector::create(input_rows_count); auto & result_data = result->getData(); @@ -284,9 +326,9 @@ private: typename Kernel::template State state; for (; prev < off; ++prev) { - Kernel::template accumulate(state, data_x[prev], data_y[prev]); + Kernel::template accumulate(state, data_x[prev], data_y[prev], kernel_params); } - result_data[row] = Kernel::finalize(state); + result_data[row] = Kernel::finalize(state, kernel_params); row++; } return result; @@ -294,7 +336,7 @@ private: /// Special case when the 1st parameter is Const template - ColumnPtr executeWithTypesFirstArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count) const + ColumnPtr executeWithTypesFirstArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { col_x = assert_cast(col_x.get())->getDataColumnPtr(); col_y = col_y->convertToFullColumnIfConst(); @@ -322,6 +364,8 @@ private: prev_offset = offsets_y[row]; } + const typename Kernel::ConstParams kernel_params = initConstParams(arguments); + auto result = ColumnVector::create(input_rows_count); auto & result_data = result->getData(); @@ -333,19 +377,59 @@ private: typename Kernel::template State state; for (size_t i = 0; prev < off; ++i, ++prev) { - Kernel::template accumulate(state, data_x[i], data_y[prev]); + Kernel::template accumulate(state, data_x[i], data_y[prev], kernel_params); } - result_data[row] = Kernel::finalize(state); + result_data[row] = Kernel::finalize(state, kernel_params); row++; } return result; } + typename Kernel::ConstParams initConstParams(const ColumnsWithTypeAndName &) const { return {}; } }; + +template <> +size_t FunctionArrayDistance::getNumberOfArguments() const { return 3; } + +template <> +ColumnNumbers FunctionArrayDistance::getArgumentsThatAreAlwaysConstant() const { return {2}; } + +template <> +LpDistance::ConstParams FunctionArrayDistance::initConstParams(const ColumnsWithTypeAndName & arguments) const +{ + if (arguments.size() < 3) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Argument p of function {} was not provided", + getName()); + + if (!arguments[2].column->isNumeric()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument p of function {} must be numeric constant", + getName()); + + if (!isColumnConst(*arguments[2].column) && arguments[2].column->size() != 1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Second argument for function {} must be either constant Float64 or constant UInt", + getName()); + + Float64 p = arguments[2].column->getFloat64(0); + if (p < 1 || p == HUGE_VAL) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Second argument for function {} must be not less than one and not be an infinity", + getName()); + + return LpDistance::ConstParams{p, 1 / p}; +} + /// These functions are used by TupleOrArrayFunction FunctionPtr createFunctionArrayL1Distance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayL2Distance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } +FunctionPtr createFunctionArrayLpDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayLinfDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index b3b5aff7063..20807b4a487 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -13,6 +13,7 @@ namespace DB { namespace ErrorCodes { + extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; } @@ -21,14 +22,16 @@ struct L1Norm { static inline String name = "L1"; + struct ConstParams {}; + template - inline static ResultType accumulate(ResultType result, ResultType value) + inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return result + fabs(value); } template - inline static ResultType finalize(ResultType result) + inline static ResultType finalize(ResultType result, const ConstParams &) { return result; } @@ -38,32 +41,59 @@ struct L2Norm { static inline String name = "L2"; + struct ConstParams {}; + template - inline static ResultType accumulate(ResultType result, ResultType value) + inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return result + value * value; } template - inline static ResultType finalize(ResultType result) + inline static ResultType finalize(ResultType result, const ConstParams &) { return sqrt(result); } }; +struct LpNorm +{ + static inline String name = "Lp"; + + struct ConstParams + { + Float64 power; + Float64 inverted_power = 1 / power; + }; + + template + inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams & params) + { + return result + std::pow(fabs(value), params.power); + } + + template + inline static ResultType finalize(ResultType result, const ConstParams & params) + { + return std::pow(result, params.inverted_power); + } +}; + struct LinfNorm { static inline String name = "Linf"; + struct ConstParams {}; + template - inline static ResultType accumulate(ResultType result, ResultType value) + inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return fmax(result, fabs(value)); } template - inline static ResultType finalize(ResultType result) + inline static ResultType finalize(ResultType result, const ConstParams &) { return result; } @@ -78,22 +108,17 @@ public: String getName() const override { return name; } static FunctionPtr create(ContextPtr) { return std::make_shared>(); } size_t getNumberOfArguments() const override { return 1; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - DataTypes types; - for (const auto & argument : arguments) - { - const auto * array_type = checkAndGetDataType(argument.type.get()); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} must be array.", getName()); + const auto * array_type = checkAndGetDataType(arguments[0].type.get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} must be array.", getName()); - types.push_back(array_type->getNestedType()); - } - const auto & common_type = getLeastSupertype(types); - switch (common_type->getTypeId()) + switch (array_type->getNestedType()->getTypeId()) { case TypeIndex::UInt8: case TypeIndex::UInt16: @@ -111,7 +136,7 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} has nested type {}. " "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", - getName(), common_type->getName()); + getName(), array_type->getNestedType()->getName()); } } @@ -125,7 +150,7 @@ public: switch (result_type->getTypeId()) { case TypeIndex::Float64: - return executeWithResultType(*arr, type, input_rows_count); + return executeWithResultType(*arr, type, input_rows_count, arguments); break; default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); @@ -148,13 +173,13 @@ private: template - ColumnPtr executeWithResultType(const ColumnArray & array, const DataTypePtr & nested_type, size_t input_rows_count) const + ColumnPtr executeWithResultType(const ColumnArray & array, const DataTypePtr & nested_type, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { switch (nested_type->getTypeId()) { #define ON_TYPE(type) \ case TypeIndex::type: \ - return executeWithTypes(array, input_rows_count); \ + return executeWithTypes(array, input_rows_count, arguments); \ break; SUPPORTED_TYPES(ON_TYPE) @@ -170,7 +195,7 @@ private: } template - static ColumnPtr executeWithTypes(const ColumnArray & array, size_t input_rows_count) + ColumnPtr executeWithTypes(const ColumnArray & array, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { const auto & data = typeid_cast &>(array.getData()).getData(); const auto & offsets = array.getOffsets(); @@ -178,6 +203,8 @@ private: auto result_col = ColumnVector::create(input_rows_count); auto & result_data = result_col->getData(); + const typename Kernel::ConstParams kernel_params = initConstParams(arguments); + ColumnArray::Offset prev = 0; size_t row = 0; for (auto off : offsets) @@ -185,18 +212,59 @@ private: Float64 result = 0; for (; prev < off; ++prev) { - result = Kernel::template accumulate(result, data[prev]); + result = Kernel::template accumulate(result, data[prev], kernel_params); } - result_data[row] = Kernel::finalize(result); + result_data[row] = Kernel::finalize(result, kernel_params); row++; } return result_col; } + + typename Kernel::ConstParams initConstParams(const ColumnsWithTypeAndName &) const { return {}; } }; +template <> +size_t FunctionArrayNorm::getNumberOfArguments() const { return 2; } + +template <> +ColumnNumbers FunctionArrayNorm::getArgumentsThatAreAlwaysConstant() const { return {1}; } + +template <> +LpNorm::ConstParams FunctionArrayNorm::initConstParams(const ColumnsWithTypeAndName & arguments) const +{ + if (arguments.size() < 2) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Argument p of function {} was not provided", + getName()); + + if (!arguments[1].column->isNumeric()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument p of function {} must be numeric constant", + getName()); + + if (!isColumnConst(*arguments[1].column) && arguments[1].column->size() != 1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Second argument for function {} must be either constant Float64 or constant UInt", + getName()); + + Float64 p = arguments[1].column->getFloat64(0); + if (p < 1 || p == HUGE_VAL) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Second argument for function {} must be not less than one and not be an infinity", + getName()); + + return LpNorm::ConstParams{p, 1 / p}; +} + + /// These functions are used by TupleOrArrayFunction FunctionPtr createFunctionArrayL1Norm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } FunctionPtr createFunctionArrayL2Norm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } +FunctionPtr createFunctionArrayLpNorm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } FunctionPtr createFunctionArrayLinfNorm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } } diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index 2c29db81dd6..ee271a67f07 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -810,12 +810,14 @@ public: const auto & p_column = arguments[1]; - const auto * p_column_const = assert_cast(p_column.column.get()); + if (!isColumnConst(*p_column.column) && p_column.column->size() != 1) + throw Exception{"Second argument for function " + getName() + " must be either constant Float64 or constant UInt", ErrorCodes::ILLEGAL_COLUMN}; + double p; - if (isFloat(p_column_const->getDataType())) - p = p_column_const->getFloat64(0); - else if (isUnsignedInteger(p_column_const->getDataType())) - p = p_column_const->getUInt(0); + if (isFloat(p_column.column->getDataType())) + p = p_column.column->getFloat64(0); + else if (isUnsignedInteger(p_column.column->getDataType())) + p = p_column.column->getUInt(0); else throw Exception{"Second argument for function " + getName() + " must be either constant Float64 or constant UInt", ErrorCodes::ILLEGAL_COLUMN}; @@ -1109,10 +1111,12 @@ private: extern FunctionPtr createFunctionArrayL1Norm(ContextPtr context_); extern FunctionPtr createFunctionArrayL2Norm(ContextPtr context_); +extern FunctionPtr createFunctionArrayLpNorm(ContextPtr context_); extern FunctionPtr createFunctionArrayLinfNorm(ContextPtr context_); extern FunctionPtr createFunctionArrayL1Distance(ContextPtr context_); extern FunctionPtr createFunctionArrayL2Distance(ContextPtr context_); +extern FunctionPtr createFunctionArrayLpDistance(ContextPtr context_); extern FunctionPtr createFunctionArrayLinfDistance(ContextPtr context_); extern FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_); @@ -1132,6 +1136,14 @@ struct L2NormTraits static constexpr auto CreateArrayFunction = createFunctionArrayL2Norm; }; +struct LpNormTraits +{ + static inline String name = "LpNorm"; + + static constexpr auto CreateTupleFunction = FunctionLpNorm::create; + static constexpr auto CreateArrayFunction = createFunctionArrayLpNorm; +}; + struct LinfNormTraits { static inline String name = "LinfNorm"; @@ -1156,6 +1168,14 @@ struct L2DistanceTraits static constexpr auto CreateArrayFunction = createFunctionArrayL2Distance; }; +struct LpDistanceTraits +{ + static inline String name = "LpDistance"; + + static constexpr auto CreateTupleFunction = FunctionLpDistance::create; + static constexpr auto CreateArrayFunction = createFunctionArrayLpDistance; +}; + struct LinfDistanceTraits { static inline String name = "LinfDistance"; @@ -1174,10 +1194,12 @@ struct CosineDistanceTraits using TupleOrArrayFunctionL1Norm = TupleOrArrayFunction; using TupleOrArrayFunctionL2Norm = TupleOrArrayFunction; +using TupleOrArrayFunctionLpNorm = TupleOrArrayFunction; using TupleOrArrayFunctionLinfNorm = TupleOrArrayFunction; using TupleOrArrayFunctionL1Distance = TupleOrArrayFunction; using TupleOrArrayFunctionL2Distance = TupleOrArrayFunction; +using TupleOrArrayFunctionLpDistance = TupleOrArrayFunction; using TupleOrArrayFunctionLinfDistance = TupleOrArrayFunction; using TupleOrArrayFunctionCosineDistance = TupleOrArrayFunction; @@ -1200,7 +1222,7 @@ void registerVectorFunctions(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); factory.registerAlias("normL1", TupleOrArrayFunctionL1Norm::name, FunctionFactory::CaseInsensitive); factory.registerAlias("normL2", TupleOrArrayFunctionL2Norm::name, FunctionFactory::CaseInsensitive); @@ -1210,7 +1232,7 @@ void registerVectorFunctions(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); factory.registerAlias("distanceL1", FunctionL1Distance::name, FunctionFactory::CaseInsensitive); factory.registerAlias("distanceL2", FunctionL2Distance::name, FunctionFactory::CaseInsensitive); diff --git a/tests/queries/0_stateless/02282_array_distance.reference b/tests/queries/0_stateless/02282_array_distance.reference index b7db2dceee8..ebce2788fe9 100644 --- a/tests/queries/0_stateless/02282_array_distance.reference +++ b/tests/queries/0_stateless/02282_array_distance.reference @@ -1,5 +1,6 @@ 6 3.7416573867739413 +3.2071843327373397 3 0.00258509695694209 \N @@ -11,6 +12,9 @@ nan 7.0710678118654755 9.16515138991168 12.12435565298214 +5.917593844525055 +8.308858759453505 +9.932246380845738 2 5 4 diff --git a/tests/queries/0_stateless/02282_array_distance.sql b/tests/queries/0_stateless/02282_array_distance.sql index 246b16daf65..75e4b0d653e 100644 --- a/tests/queries/0_stateless/02282_array_distance.sql +++ b/tests/queries/0_stateless/02282_array_distance.sql @@ -1,5 +1,6 @@ SELECT L1Distance([0, 0, 0], [1, 2, 3]); SELECT L2Distance([1, 2, 3], [0, 0, 0]); +SELECT LpDistance([1, 2, 3], [0, 0, 0], 3.5); SELECT LinfDistance([1, 2, 3], [0, 0, 0]); SELECT cosineDistance([1, 2, 3], [3, 5, 7]); @@ -26,6 +27,7 @@ CREATE TABLE vec2d (id UInt64, v Array(Float64)) ENGINE = Memory; INSERT INTO vec1 VALUES (1, [3, 4, 5]), (2, [2, 4, 8]), (3, [7, 7, 7]); SELECT L1Distance(v, [0, 0, 0]) FROM vec1; SELECT L2Distance(v, [0, 0, 0]) FROM vec1; +SELECT LpDistance(v, [0, 0, 0], 3.14) FROM vec1; SELECT LinfDistance([5, 4, 3], v) FROM vec1; SELECT cosineDistance([3, 2, 1], v) FROM vec1; SELECT LinfDistance(v, materialize([0, -2, 0])) FROM vec1; @@ -42,6 +44,10 @@ SELECT v1.id, v2.id, L2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2d v2; SELECT L1Distance([0, 0], [1]); -- { serverError 190 } SELECT L2Distance([1, 2], (3,4)); -- { serverError 43 } +SELECT LpDistance([1, 2], [3,4]); -- { serverError 42 } +SELECT LpDistance([1, 2], [3,4], -1.); -- { serverError 69 } +SELECT LpDistance([1, 2], [3,4], 'aaa'); -- { serverError 43 } +SELECT LpDistance([1, 2], [3,4], materialize(2.7)); -- { serverError 44 } DROP TABLE vec1; DROP TABLE vec2; diff --git a/tests/queries/0_stateless/02283_array_norm.reference b/tests/queries/0_stateless/02283_array_norm.reference index 68dbce0b436..ebaadee321f 100644 --- a/tests/queries/0_stateless/02283_array_norm.reference +++ b/tests/queries/0_stateless/02283_array_norm.reference @@ -1,27 +1,28 @@ 6 7.0710678118654755 +10.882246697870885 2 -10803059573 4234902446.7343364 2096941042 -1 5 -2 2 -3 5.196152422706632 -4 0 +10803059573 4234902446.7343364 10803059573 4234902446.7343364 3122003357.3280888 2096941042 +1 7 5 4.601724723020627 4 +2 2 2 2 2 +3 9 5.196152422706632 4.506432087111623 3 +4 0 0 0 0 1 11 2 11 3 11 4 11 -1 5 -2 2 -3 5.196152422706632 -4 0 +1 7 5 4.601724723020627 4 +2 2 2 2 2 +3 9 5.196152422706632 4.506432087111623 3 +4 0 0 0 0 1 11 2 11 3 11 4 11 -1 5 -2 2 -3 5.196152422706632 -4 0 +1 7 5 4.601724723020627 4 +2 2 2 2 2 +3 9 5.196152422706632 4.506432087111623 3 +4 0 0 0 0 1 11 2 11 3 11 diff --git a/tests/queries/0_stateless/02283_array_norm.sql b/tests/queries/0_stateless/02283_array_norm.sql index 8408eea3f8b..6938618d633 100644 --- a/tests/queries/0_stateless/02283_array_norm.sql +++ b/tests/queries/0_stateless/02283_array_norm.sql @@ -1,5 +1,6 @@ SELECT L1Norm([1, 2, 3]); SELECT L2Norm([3., 4., 5.]); +SELECT LpNorm([3., 4., 5.], 1.1); SELECT LinfNorm([0, 0, 2]); -- Overflows @@ -7,6 +8,9 @@ WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -57341255 SELECT L1Norm(a), L2Norm(a), + LpNorm(a,1), + LpNorm(a,2), + LpNorm(a,3.14), LinfNorm(a); DROP TABLE IF EXISTS vec1; @@ -19,17 +23,23 @@ INSERT INTO vec1 VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); INSERT INTO vec1f VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); INSERT INTO vec1d VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); -SELECT id, L2Norm(v) FROM vec1; +SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1; -SELECT id, L2Norm(v) FROM vec1f; +SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1f; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1f; -SELECT id, L2Norm(v) FROM vec1d; +SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1d; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1d; SELECT L1Norm(1, 2); -- { serverError 42 } +SELECT LpNorm([1,2]); -- { serverError 42 } +SELECT LpNorm([1,2], -3.4); -- { serverError 69 } +SELECT LpNorm([1,2], 'aa'); -- { serverError 43 } +SELECT LpNorm([1,2], [1]); -- { serverError 43 } +SELECT LpNorm([1,2], materialize(3.14)); -- { serverError 44 } + DROP TABLE vec1; DROP TABLE vec1f; DROP TABLE vec1d; From 8099361cbc7ced80c6e2d72e88d2fab3d80795d6 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 27 May 2022 17:48:14 +0200 Subject: [PATCH 581/615] Update FileCache.cpp --- src/Common/FileCache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 6c76bf5c0b3..efb2f29d274 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -1053,7 +1053,8 @@ void LRUFileCache::assertCacheCellsCorrectness( if (file_segment->reserved_size != 0) { assert(cell.queue_iterator); - assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock)); + /// FIXME: this is too slow, need to make it O(1) + /// assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock)); } } } From 6361c5f38c9893345d10b2e9a4cd27aecc335777 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 27 May 2022 18:22:16 +0200 Subject: [PATCH 582/615] Fix for failed style check --- src/Functions/array/arrayDistance.cpp | 1 + src/Functions/array/arrayNorm.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 2121189dacb..d5359572437 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -17,6 +17,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int ARGUMENT_OUT_OF_BOUND; } struct L1Distance diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index 20807b4a487..805368be5ee 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; + extern const int ARGUMENT_OUT_OF_BOUND; } struct L1Norm From 9b1b30855c6513dd49bb9ab53c48f21f54537c5e Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 27 May 2022 18:25:11 +0200 Subject: [PATCH 583/615] Fixed check for HUGE_VAL --- src/Functions/array/arrayDistance.cpp | 2 +- src/Functions/array/arrayNorm.cpp | 2 +- src/Functions/vectorFunctions.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index d5359572437..3f7900b6c62 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -418,7 +418,7 @@ LpDistance::ConstParams FunctionArrayDistance::initConstParams(const getName()); Float64 p = arguments[2].column->getFloat64(0); - if (p < 1 || p == HUGE_VAL) + if (p < 1 || p >= HUGE_VAL) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument for function {} must be not less than one and not be an infinity", diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index 805368be5ee..2142abc4c90 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -252,7 +252,7 @@ LpNorm::ConstParams FunctionArrayNorm::initConstParams(const ColumnsWith getName()); Float64 p = arguments[1].column->getFloat64(0); - if (p < 1 || p == HUGE_VAL) + if (p < 1 || p >= HUGE_VAL) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument for function {} must be not less than one and not be an infinity", diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index ee271a67f07..411b30040cc 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -821,7 +821,7 @@ public: else throw Exception{"Second argument for function " + getName() + " must be either constant Float64 or constant UInt", ErrorCodes::ILLEGAL_COLUMN}; - if (p < 1 || p == HUGE_VAL) + if (p < 1 || p >= HUGE_VAL) throw Exception{"Second argument for function " + getName() + " must be not less than one and not be an infinity", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; auto abs = FunctionFactory::instance().get("abs", context); From a061acadbec41568eee13d7548a1b3b197e13dac Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 27 May 2022 11:04:29 -0700 Subject: [PATCH 584/615] Remove std::move from trivially-copyable object --- src/Dictionaries/FlatDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index d0d9fba763c..bd664224d41 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -643,7 +643,7 @@ void registerDictionaryFlat(DictionaryFactory & factory) const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); - return std::make_unique(dict_id, dict_struct, std::move(source_ptr), std::move(configuration)); + return std::make_unique(dict_id, dict_struct, std::move(source_ptr), configuration); }; factory.registerLayout("flat", create_layout, false); From fa31d758d6603e5187f187930c14572fb155becc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 22:22:59 +0200 Subject: [PATCH 585/615] Update tests --- .../0_stateless/00189_time_zones_long.reference | 12 ++++++------ .../0_stateless/00735_long_conditional.reference | 4 ++-- .../0_stateless/00900_long_parquet_load.reference | 4 ++-- .../0_stateless/01098_msgpack_format.reference | 4 ++-- .../0_stateless/01307_orc_output_format.reference | 4 ++-- .../0_stateless/01905_to_json_string.reference | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/00189_time_zones_long.reference b/tests/queries/0_stateless/00189_time_zones_long.reference index c55542f59a6..e53ec7ca815 100644 --- a/tests/queries/0_stateless/00189_time_zones_long.reference +++ b/tests/queries/0_stateless/00189_time_zones_long.reference @@ -10,7 +10,7 @@ toStartOfDay 2014-09-30 00:00:00 2014-09-30 00:00:00 toMonday -2014-12-29 +2014-12-22 2014-12-22 2014-12-22 2014-12-29 @@ -54,7 +54,7 @@ toStartOfYear 2014-01-01 2014-01-01 toTime -1970-01-02 12:00:00 1970-01-02 12:00:00 +1970-01-02 11:00:00 1970-01-02 12:00:00 1970-01-02 10:00:00 1970-01-02 11:00:00 1970-01-02 09:00:00 1970-01-02 10:00:00 1970-01-02 18:00:00 1970-01-02 18:00:00 @@ -84,7 +84,7 @@ toDayOfWeek 3 2 toHour -23 +22 21 20 4 @@ -236,10 +236,10 @@ toString 2015-07-15 02:30:00 toUnixTimestamp 1426415400 +1426419000 1426422600 -1426426200 -1426393800 -1426455000 +1426390200 +1426451400 1426415400 1426415400 1426415400 diff --git a/tests/queries/0_stateless/00735_long_conditional.reference b/tests/queries/0_stateless/00735_long_conditional.reference index f6c06e64066..05383d6e1f0 100644 --- a/tests/queries/0_stateless/00735_long_conditional.reference +++ b/tests/queries/0_stateless/00735_long_conditional.reference @@ -94,7 +94,7 @@ value vs value 1970-01-01 1970-01-02 1970-01-02 Date Date Date 2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') 2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Asia/Istanbul\') Date DateTime(\'Asia/Istanbul\') -1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') +1970-01-01 02:00:00 1970-01-01 02:00:01 1970-01-01 02:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') column vs value 0 1 1 Int8 Int8 Int8 0 1 1 Int8 Int16 Int16 @@ -191,4 +191,4 @@ column vs value 1970-01-01 1970-01-02 1970-01-02 Date Date Date 2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') 2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Asia/Istanbul\') Date DateTime(\'Asia/Istanbul\') -1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') +1970-01-01 02:00:00 1970-01-01 02:00:01 1970-01-01 02:00:01 DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') DateTime(\'Asia/Istanbul\') diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index b295a226853..72ec99ad2c6 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -3,8 +3,8 @@ 1 0 1 1 1 10 1.1 10.1 01/01/09 1 1230768060 === Try load data from alltypes_list.parquet [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] -[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['2000-01-01 00:00:00','2001-01-01 00:00:00','2002-01-01 00:00:00'] [0.2,10,4] [4,10000.1,10000.1] [1000000000,90,101001.01] -[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['2000-01-01 00:00:00','2001-01-01 00:00:00','2002-01-01 00:00:00'] [0.2,10,4] [4,10000.1,10000.1] [1000000000,90,101001.01] +[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['1999-12-31 23:00:00','2000-12-31 23:00:00','2001-12-31 23:00:00'] [0.2,10,4] [4,10000.1,10000.1] [1000000000,90,101001.01] +[1,-2,3] [1,2,3] [100,-200,300] [100,200,300] [10000000,-20000000,30000000] [10000000,2000000,3000000] [100000000000000,-200000000000,3000000000000] [100000000000000,20000000000000,3000000000000] ['Some string','Some string','Some string'] ['0000','1111','2222'] [42.42,424.2,0.4242] [424242.424242,4242042420.242424,42] ['2000-01-01','2001-01-01','2002-01-01'] ['1999-12-31 23:00:00','2000-12-31 23:00:00','2001-12-31 23:00:00'] [0.2,10,4] [4,10000.1,10000.1] [1000000000,90,101001.01] === Try load data from alltypes_plain.parquet 4 1 0 0 0 0 0 0 03/01/09 0 1235865600 5 0 1 1 1 10 1.1 10.1 03/01/09 1 1235865660 diff --git a/tests/queries/0_stateless/01098_msgpack_format.reference b/tests/queries/0_stateless/01098_msgpack_format.reference index 384852f24a7..cfe3501cb88 100644 --- a/tests/queries/0_stateless/01098_msgpack_format.reference +++ b/tests/queries/0_stateless/01098_msgpack_format.reference @@ -1,9 +1,9 @@ 255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 [1,2,3,4,5] 4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 [5,4,3,2,1] -42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 [42] +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 02:00:42 1970-01-01 02:00:00.042 [42] 255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 [1,2,3,4,5] 4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 [5,4,3,2,1] -42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 [42] +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 02:00:42 1970-01-01 02:00:00.042 [42] [[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] [[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] [0,1,2,3,42,253,254,255] diff --git a/tests/queries/0_stateless/01307_orc_output_format.reference b/tests/queries/0_stateless/01307_orc_output_format.reference index e185c02a3e5..657d28b3093 100644 --- a/tests/queries/0_stateless/01307_orc_output_format.reference +++ b/tests/queries/0_stateless/01307_orc_output_format.reference @@ -1,6 +1,6 @@ 255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2020 2021-12-19 2021-12-19 03:00:00 1.0001 1.00000001 100000.00000000000001 1 4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2000 2024-10-04 2028-04-21 01:20:00 34.1234 123123.123123123 123123123.123123123123123 \N -42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 03:00:42 42.42 42.42424242 424242.42424242424242 42 +42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 02:00:42 42.42 42.42424242 424242.42424242424242 42 255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2020 2021-12-19 2021-12-19 03:00:00 1.0001 1.00000001 100000.00000000000001 1 4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2000 2024-10-04 2028-04-21 01:20:00 34.1234 123123.123123123 123123123.123123123123123 \N -42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 03:00:42 42.42 42.42424242 424242.42424242424242 42 +42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 02:00:42 42.42 42.42424242 424242.42424242424242 42 diff --git a/tests/queries/0_stateless/01905_to_json_string.reference b/tests/queries/0_stateless/01905_to_json_string.reference index e669022f208..33d435f8e1a 100644 --- a/tests/queries/0_stateless/01905_to_json_string.reference +++ b/tests/queries/0_stateless/01905_to_json_string.reference @@ -1,3 +1,3 @@ -[] 2947817982 "&" -69802.9769 "w" -1.9158530982937093e25 ["2003-05-15","1988-03-19 07:13:49","2090-04-14 03:58:26.029","91943d2e-480d-66b5-ee4c-1b5bb8eb7256"] "O" [] +[] 2947817982 "&" -69802.9769 "w" -1.9158530982937093e25 ["2003-05-15","1988-03-19 06:13:49","2090-04-14 03:58:26.029","91943d2e-480d-66b5-ee4c-1b5bb8eb7256"] "O" [] [-115] 481807067 ",{MM" -170235.0663 "o" 3.3808659558052087e155 ["2055-01-12","2070-08-09 03:49:21","2068-11-30 09:36:49.672","20b0e7b5-ad0e-177b-3054-c779b2a8ebe0"] "I\\u001C" ["e57178f9-4d10-2fa1-7c2d-53c5a65c3463"] {"1234":"5678"} From 10c97164677843da83c540c846d08b5948593f9f Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 27 May 2022 22:48:07 +0200 Subject: [PATCH 586/615] Fix clang-tidy --- src/Common/FileCache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index f66287b805f..3962679770b 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -301,7 +301,7 @@ private: size_t getFileSegmentsNumUnlocked(std::lock_guard & cache_lock) const; - void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard & cache_lock); + static void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard & cache_lock); public: String dumpStructure(const Key & key_) override; From c50791dd3babb806626fe2229364f8161b787600 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 22:51:37 +0200 Subject: [PATCH 587/615] Fix clang-tidy-14, part 1 --- src/Client/ClientBase.cpp | 7 ++++--- src/Common/ThreadFuzzer.cpp | 8 ++++++-- src/Common/filesystemHelpers.cpp | 3 +-- src/Daemon/BaseDaemon.cpp | 8 ++++---- src/Databases/MySQL/DatabaseMySQL.cpp | 16 ++++++++-------- src/Dictionaries/MySQLDictionarySource.cpp | 16 +++++++--------- src/Dictionaries/RangeHashedDictionary.cpp | 8 ++++---- src/Formats/FormatFactory.cpp | 5 ++--- src/Functions/CRC.cpp | 2 +- src/Functions/FunctionsBinaryRepresentation.cpp | 2 +- src/Functions/FunctionsJSON.cpp | 2 +- src/Functions/URL/port.cpp | 3 +-- src/Functions/pointInPolygon.cpp | 2 +- src/Interpreters/DatabaseCatalog.cpp | 2 +- .../TranslateQualifiedNamesVisitor.cpp | 2 +- src/Processors/Executors/PollingQueue.cpp | 2 +- src/Server/MySQLHandlerFactory.cpp | 11 +++++++++-- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 4 ++-- 20 files changed, 57 insertions(+), 50 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 9cc31df0b43..d678441d442 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -285,11 +285,11 @@ void ClientBase::setupSignalHandler() sigemptyset(&new_act.sa_mask); #else if (sigemptyset(&new_act.sa_mask)) - throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler."); + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); #endif if (sigaction(SIGINT, &new_act, nullptr)) - throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler."); + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); } @@ -492,7 +492,8 @@ try String pager = config().getString("pager", ""); if (!pager.empty()) { - signal(SIGPIPE, SIG_IGN); + if (SIG_ERR == signal(SIGPIPE, SIG_IGN)) + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); ShellCommand::Config config(pager); config.pipe_stdin_only = true; diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 9d07edeb502..962cfee074d 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -1,3 +1,5 @@ +// NOLINTBEGIN(readability-inconsistent-declaration-parameter-name) + #include #include #if defined(OS_LINUX) @@ -292,8 +294,8 @@ void ThreadFuzzer::setup() const #if THREAD_FUZZER_WRAP_PTHREAD # define MAKE_WRAPPER(RET, NAME, ...) \ - extern "C" RET __##NAME(__VA_ARGS__); /* NOLINT */ \ - extern "C" RET NAME(__VA_ARGS__) /* NOLINT */ \ + extern "C" RET __##NAME(__VA_ARGS__); \ + extern "C" RET NAME(__VA_ARGS__) \ { \ injection( \ NAME##_before_yield_probability.load(std::memory_order_relaxed), \ @@ -317,3 +319,5 @@ FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER) # undef MAKE_WRAPPER #endif } + +// NOLINTEND(readability-inconsistent-declaration-parameter-name) diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index ca06b21ab3a..6d9e0859692 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -1,10 +1,8 @@ #include "filesystemHelpers.h" -#include #if defined(__linux__) # include # include -# include # include #endif #include @@ -13,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index f7cfbab289a..c5341fb0ac1 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -76,7 +76,8 @@ DB::PipeFDs signal_pipe; */ static void call_default_signal_handler(int sig) { - signal(sig, SIG_DFL); + if (SIG_ERR == signal(sig, SIG_DFL)) + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); raise(sig); } @@ -498,9 +499,8 @@ BaseDaemon::~BaseDaemon() signal_listener_thread.join(); /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after. for (int sig : handled_signals) - { - signal(sig, SIG_DFL); - } + if (SIG_ERR == signal(sig, SIG_DFL)) + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); signal_pipe.close(); } diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 279867542e2..446518be5cd 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -89,7 +89,7 @@ bool DatabaseMySQL::empty() const return true; for (const auto & [table_name, storage_info] : local_tables_cache) - if (!remove_or_detach_tables.count(table_name)) + if (!remove_or_detach_tables.contains(table_name)) return false; return true; @@ -103,7 +103,7 @@ DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_cont fetchTablesIntoLocalCache(local_context); for (const auto & [table_name, modify_time_and_storage] : local_tables_cache) - if (!remove_or_detach_tables.count(table_name) && (!filter_by_table_name || filter_by_table_name(table_name))) + if (!remove_or_detach_tables.contains(table_name) && (!filter_by_table_name || filter_by_table_name(table_name))) tables[table_name] = modify_time_and_storage.second; return std::make_unique(tables, database_name); @@ -120,7 +120,7 @@ StoragePtr DatabaseMySQL::tryGetTable(const String & mysql_table_name, ContextPt fetchTablesIntoLocalCache(local_context); - if (!remove_or_detach_tables.count(mysql_table_name) && local_tables_cache.find(mysql_table_name) != local_tables_cache.end()) + if (!remove_or_detach_tables.contains(mysql_table_name) && local_tables_cache.find(mysql_table_name) != local_tables_cache.end()) return local_tables_cache[mysql_table_name].second; return StoragePtr{}; @@ -349,11 +349,11 @@ void DatabaseMySQL::attachTable(ContextPtr /* context_ */, const String & table_ { std::lock_guard lock{mutex}; - if (!local_tables_cache.count(table_name)) + if (!local_tables_cache.contains(table_name)) throw Exception("Cannot attach table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " because it does not exist.", ErrorCodes::UNKNOWN_TABLE); - if (!remove_or_detach_tables.count(table_name)) + if (!remove_or_detach_tables.contains(table_name)) throw Exception("Cannot attach table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " because it already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); @@ -372,11 +372,11 @@ StoragePtr DatabaseMySQL::detachTable(ContextPtr /* context */, const String & t { std::lock_guard lock{mutex}; - if (remove_or_detach_tables.count(table_name)) + if (remove_or_detach_tables.contains(table_name)) throw Exception("Table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " is dropped", ErrorCodes::TABLE_IS_DROPPED); - if (!local_tables_cache.count(table_name)) + if (!local_tables_cache.contains(table_name)) throw Exception("Table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); @@ -412,7 +412,7 @@ void DatabaseMySQL::detachTablePermanently(ContextPtr, const String & table_name fs::path remove_flag = fs::path(getMetadataPath()) / (escapeForFileName(table_name) + suffix); - if (remove_or_detach_tables.count(table_name)) + if (remove_or_detach_tables.contains(table_name)) throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); if (fs::exists(remove_flag)) diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 57d61ce5724..22ca5a5b08c 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -16,6 +16,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include "readInvalidateQuery.h" namespace DB @@ -118,15 +125,6 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) #if USE_MYSQL -# include -# include -# include -# include -# include -# include -# include "readInvalidateQuery.h" -# include -# include namespace DB { diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 20230b1bd32..261e9166ec8 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -104,7 +104,7 @@ ColumnPtr RangeHashedDictionary::getColumn( /// Cast range column to storage type Columns modified_key_columns = key_columns; - auto range_storage_column = key_columns.back(); + const ColumnPtr & range_storage_column = key_columns.back(); ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type); @@ -314,7 +314,7 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Colum } /// Cast range column to storage type - auto range_storage_column = key_columns.back(); + const ColumnPtr & range_storage_column = key_columns.back(); ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; auto range_column_updated = castColumnAccurate(column_to_cast, dict_struct.range_min->type); auto key_columns_copy = key_columns; @@ -513,7 +513,7 @@ void RangeHashedDictionary::getItemsImpl( size_t keys_found = 0; - auto range_column = key_columns.back(); + const ColumnPtr & range_column = key_columns.back(); auto key_columns_copy = key_columns; key_columns_copy.pop_back(); @@ -984,7 +984,7 @@ Pipe RangeHashedDictionary::read(const Names & column_names Columns result; result.reserve(attribute_names_size); - auto key_column = key_columns.back(); + const ColumnPtr & key_column = key_columns.back(); const auto * key_to_index_column = typeid_cast(key_column.get()); if (!key_to_index_column) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 644e4d3ecfd..6a7eb88bca1 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -492,10 +492,9 @@ String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_ String FormatFactory::getFormatFromFileDescriptor(int fd) { #ifdef OS_LINUX - char buf[32] = {'\0'}; - snprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd); + std::string proc_path = fmt::format("/proc/self/fd/{}", fd); char file_path[PATH_MAX] = {'\0'}; - if (readlink(buf, file_path, sizeof(file_path) - 1) != -1) + if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1) return getFormatFromFileName(file_path, false); return ""; #elif defined(__APPLE__) diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index abcf137f2e7..b7c6c1195ea 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -118,7 +118,7 @@ struct CRCFunctionWrapper private: static ReturnType doCRC(const ColumnString::Chars & buf, size_t offset, size_t size) { - const unsigned char * p = reinterpret_cast(&buf[0]) + offset; + const unsigned char * p = reinterpret_cast(buf.data()) + offset; return Impl::makeCRC(p, size); } }; diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index 582dd1f1049..a13558133d3 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -91,7 +91,7 @@ struct HexImpl out_vec.resize(size * hex_length); size_t pos = 0; - char * out = reinterpret_cast(&out_vec[0]); + char * out = reinterpret_cast(out_vec.data()); for (size_t i = 0; i < size; ++i) { const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index d2bcb646ecf..49546aac92b 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -123,7 +123,7 @@ public: bool document_ok = false; if (col_json_const) { - std::string_view json{reinterpret_cast(&chars[0]), offsets[0] - 1}; + std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; document_ok = parser.parse(json, document); } diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index afe27c9240c..e2dbc75ab3f 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -91,7 +91,7 @@ private: static UInt16 extractPort(UInt16 default_port, const ColumnString::Chars & buf, size_t offset, size_t size) { - const char * p = reinterpret_cast(&buf[0]) + offset; + const char * p = reinterpret_cast(buf.data()) + offset; const char * end = p + size; StringRef host = getURLHost(p, size); @@ -127,4 +127,3 @@ void registerFunctionPort(FunctionFactory & factory) } } - diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index c3a9c411cbc..7d2369fd5e7 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -167,7 +167,7 @@ public: const auto & tuple_columns = tuple_col->getColumns(); - const ColumnWithTypeAndName poly = arguments[1]; + const ColumnWithTypeAndName & poly = arguments[1]; const IColumn * poly_col = poly.column.get(); const ColumnConst * const_poly_col = checkAndGetColumn(poly_col); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 267564eb84c..2589df0986a 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -647,7 +647,7 @@ std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForData { std::unique_lock lock(ddl_guards_mutex); db_guard_iter = ddl_guards.try_emplace(database).first; - assert(db_guard_iter->second.first.count("")); + assert(db_guard_iter->second.first.contains("")); } DatabaseGuard & db_guard = db_guard_iter->second; return std::unique_lock{db_guard.second}; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 35fb0828b3e..3129f9d7fe2 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -253,7 +253,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { for (const auto & column : table.columns) { - if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name))) + if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.contains(column.name))) { addIdentifier(columns, table.table, column.name); } diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp index a601d426a5d..270f495a2f0 100644 --- a/src/Processors/Executors/PollingQueue.cpp +++ b/src/Processors/Executors/PollingQueue.cpp @@ -38,7 +38,7 @@ PollingQueue::~PollingQueue() void PollingQueue::addTask(size_t thread_number, void * data, int fd) { std::uintptr_t key = reinterpret_cast(data); - if (tasks.count(key)) + if (tasks.contains(key)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Task {} was already added to task queue", key); tasks[key] = TaskData{thread_number, data, fd}; diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index 7263b234068..c02a3015945 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -16,6 +16,7 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; extern const int NO_ELEMENTS_IN_CONFIG; extern const int OPENSSL_ERROR; } @@ -66,7 +67,10 @@ void MySQLHandlerFactory::readRSAKeys() FILE * fp = fopen(certificate_file.data(), "r"); if (fp == nullptr) throw Exception("Cannot open certificate file: " + certificate_file + ".", ErrorCodes::CANNOT_OPEN_FILE); - SCOPE_EXIT(fclose(fp)); + SCOPE_EXIT( + if (0 != fclose(fp)) + throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE); + ); X509 * x509 = PEM_read_X509(fp, nullptr, nullptr, nullptr); SCOPE_EXIT(X509_free(x509)); @@ -89,7 +93,10 @@ void MySQLHandlerFactory::readRSAKeys() FILE * fp = fopen(private_key_file.data(), "r"); if (fp == nullptr) throw Exception ("Cannot open private key file " + private_key_file + ".", ErrorCodes::CANNOT_OPEN_FILE); - SCOPE_EXIT(fclose(fp)); + SCOPE_EXIT( + if (0 != fclose(fp)) + throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE); + ); private_key.reset(PEM_read_RSAPrivateKey(fp, nullptr, nullptr, nullptr)); if (!private_key) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index be1c9ffd370..ca258da7b5d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3072,7 +3072,7 @@ void MergeTreeData::forgetPartAndMoveToDetached(const MergeTreeData::DataPartPtr throw Exception("No such data part " + part_to_detach->getNameWithState(), ErrorCodes::NO_SUCH_DATA_PART); /// What if part_to_detach is a reference to *it_part? Make a new owner just in case. - DataPartPtr part = *it_part; + const DataPartPtr & part = *it_part; if (part->getState() == DataPartState::Active) { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a8e4854f809..1866d65ccfd 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -840,7 +840,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd Strings forced_indices; { - Tokens tokens(&indices[0], &indices[indices.size()], settings.max_query_size); + Tokens tokens(indices.data(), &indices[indices.size()], settings.max_query_size); IParser::Pos pos(tokens, settings.max_parser_depth); Expected expected; if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices)) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index a19e3a778cd..e7882ce4952 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -319,12 +319,12 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in) size_t columns_size; in >> columns_size >> "\n"; columns_str.resize(columns_size); - in.readStrict(&columns_str[0], columns_size); + in.readStrict(columns_str.data(), columns_size); in >> "\nmetadata_str_size:\n"; size_t metadata_size; in >> metadata_size >> "\n"; metadata_str.resize(metadata_size); - in.readStrict(&metadata_str[0], metadata_size); + in.readStrict(metadata_str.data(), metadata_size); } else if (type_str == "sync_pinned_part_uuids") { From 6c2699a991e0535f6962b322d22d2c0182ccd925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 23:00:23 +0200 Subject: [PATCH 588/615] Fix clang-tidy-14, part 1 --- src/Daemon/BaseDaemon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index c5341fb0ac1..2bf699da9c7 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -77,7 +77,7 @@ DB::PipeFDs signal_pipe; static void call_default_signal_handler(int sig) { if (SIG_ERR == signal(sig, SIG_DFL)) - throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); + throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); raise(sig); } @@ -500,7 +500,7 @@ BaseDaemon::~BaseDaemon() /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after. for (int sig : handled_signals) if (SIG_ERR == signal(sig, SIG_DFL)) - throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); + throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); signal_pipe.close(); } From d62c57be3fafdc9d9b12073e5f63ea69db6ff46b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 23:02:05 +0200 Subject: [PATCH 589/615] Fix clang-tidy-14, part 1 --- src/Daemon/BaseDaemon.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 2bf699da9c7..bb36e0eb1ea 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -68,6 +68,14 @@ namespace fs = std::filesystem; +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_SET_SIGNAL_HANDLER; + } +} + DB::PipeFDs signal_pipe; From d6597efc08ffac90d9e89cbda2b3e86d7e84dad4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 27 May 2022 23:03:16 +0200 Subject: [PATCH 590/615] Fix clang-tidy-14, part 1 --- src/Daemon/BaseDaemon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index bb36e0eb1ea..1b1e4611dc2 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -85,7 +85,7 @@ DB::PipeFDs signal_pipe; static void call_default_signal_handler(int sig) { if (SIG_ERR == signal(sig, SIG_DFL)) - throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); + DB::throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); raise(sig); } @@ -508,7 +508,7 @@ BaseDaemon::~BaseDaemon() /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after. for (int sig : handled_signals) if (SIG_ERR == signal(sig, SIG_DFL)) - throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); + DB::throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); signal_pipe.close(); } From f3e83cb222a9c155ac32d5601c7b4198c14004c6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 28 May 2022 01:51:41 +0300 Subject: [PATCH 591/615] Update star-schema.md --- docs/en/getting-started/example-datasets/star-schema.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/getting-started/example-datasets/star-schema.md b/docs/en/getting-started/example-datasets/star-schema.md index 35ff492c360..ea855a664a9 100644 --- a/docs/en/getting-started/example-datasets/star-schema.md +++ b/docs/en/getting-started/example-datasets/star-schema.md @@ -26,7 +26,6 @@ $ ./dbgen -s 1000 -T c $ ./dbgen -s 1000 -T l $ ./dbgen -s 1000 -T p $ ./dbgen -s 1000 -T s -$ ./dbgen -s 1000 -T d ``` Creating tables in ClickHouse: From 39a55991ca49d6e5c809a38a490184c8b92b98fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 28 May 2022 01:18:07 +0200 Subject: [PATCH 592/615] Change Playground URL in the docs --- docs/en/development/contrib.md | 2 +- docs/en/getting-started/example-datasets/brown-benchmark.md | 2 +- docs/en/getting-started/example-datasets/cell-towers.md | 4 ++-- docs/en/getting-started/example-datasets/menus.md | 2 +- docs/en/getting-started/example-datasets/ontime.md | 2 +- docs/en/getting-started/example-datasets/opensky.md | 2 +- docs/en/getting-started/example-datasets/recipes.md | 2 +- docs/en/getting-started/example-datasets/uk-price-paid.md | 2 +- docs/ru/development/contrib.md | 2 +- docs/ru/getting-started/example-datasets/brown-benchmark.md | 3 +-- docs/ru/getting-started/example-datasets/cell-towers.md | 2 +- docs/ru/getting-started/example-datasets/recipes.md | 2 +- 12 files changed, 13 insertions(+), 14 deletions(-) diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 0a254f8c8ae..21ec7cf635b 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -92,7 +92,7 @@ The list of third-party libraries can be obtained by the following query: SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; ``` -[Example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) +[Example](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) ## Adding new third-party libraries and maintaining patches in third-party libraries {#adding-third-party-libraries} diff --git a/docs/en/getting-started/example-datasets/brown-benchmark.md b/docs/en/getting-started/example-datasets/brown-benchmark.md index 0960756dbe9..b8e6140c60f 100644 --- a/docs/en/getting-started/example-datasets/brown-benchmark.md +++ b/docs/en/getting-started/example-datasets/brown-benchmark.md @@ -411,6 +411,6 @@ ORDER BY yr, mo; ``` -The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.com/play?user=play), [example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). +The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). [Original article](https://clickhouse.com/docs/en/getting_started/example_datasets/brown-benchmark/) diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index 7a35a28faa6..8da7761eea4 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -126,6 +126,6 @@ SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM 1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) ``` -The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.com/play?user=play), [example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). +The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). -Although you cannot create temporary tables there. \ No newline at end of file +Although you cannot create temporary tables there. diff --git a/docs/en/getting-started/example-datasets/menus.md b/docs/en/getting-started/example-datasets/menus.md index c572dcdb491..fd20c75f707 100644 --- a/docs/en/getting-started/example-datasets/menus.md +++ b/docs/en/getting-started/example-datasets/menus.md @@ -351,4 +351,4 @@ At least they have caviar with vodka. Very nice. ## Online Playground {#playground} -The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==). +The data is uploaded to ClickHouse Playground, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==). diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index aa181a7deff..4b24d8fd6e7 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -398,7 +398,7 @@ ORDER BY c DESC LIMIT 10; ``` -You can also play with the data in Playground, [example](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIERheU9mV2VlaywgY291bnQoKikgQVMgYwpGUk9NIG9udGltZQpXSEVSRSBZZWFyPj0yMDAwIEFORCBZZWFyPD0yMDA4CkdST1VQIEJZIERheU9mV2VlawpPUkRFUiBCWSBjIERFU0M7Cg==). +You can also play with the data in Playground, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIERheU9mV2VlaywgY291bnQoKikgQVMgYwpGUk9NIG9udGltZQpXSEVSRSBZZWFyPj0yMDAwIEFORCBZZWFyPD0yMDA4CkdST1VQIEJZIERheU9mV2VlawpPUkRFUiBCWSBjIERFU0M7Cg==). This performance test was created by Vadim Tkachenko. See: diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index f55ebc79590..b38021c34eb 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -417,4 +417,4 @@ Result: ### Online Playground {#playground} -You can test other queries to this data set using the interactive resource [Online Playground](https://gh-api.clickhouse.com/play?user=play). For example, [like this](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). However, please note that you cannot create temporary tables here. +You can test other queries to this data set using the interactive resource [Online Playground](https://play.clickhouse.com/play?user=play). For example, [like this](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). However, please note that you cannot create temporary tables here. diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md index 5b10c7c9c2c..37a6eeebea5 100644 --- a/docs/en/getting-started/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -334,6 +334,6 @@ Result: ### Online Playground -The dataset is also available in the [Online Playground](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). +The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). [Original article](https://clickhouse.com/docs/en/getting-started/example-datasets/recipes/) diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index eaec6e53ed4..b7a486fb057 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -646,4 +646,4 @@ no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows ### Test It in Playground {#playground} -The dataset is also available in the [Online Playground](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). +The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). diff --git a/docs/ru/development/contrib.md b/docs/ru/development/contrib.md index b98ed847a0b..1b99ec97553 100644 --- a/docs/ru/development/contrib.md +++ b/docs/ru/development/contrib.md @@ -92,7 +92,7 @@ sidebar_label: "Используемые сторонние библиотеки SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; ``` -[Пример](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) +[Пример](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) ## Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений {#adding-third-party-libraries} diff --git a/docs/ru/getting-started/example-datasets/brown-benchmark.md b/docs/ru/getting-started/example-datasets/brown-benchmark.md index 8d2605f4a9f..8afda860b72 100644 --- a/docs/ru/getting-started/example-datasets/brown-benchmark.md +++ b/docs/ru/getting-started/example-datasets/brown-benchmark.md @@ -411,5 +411,4 @@ ORDER BY yr, mo; ``` -Данные также доступны для работы с интерактивными запросами через [Playground](https://gh-api.clickhouse.com/play?user=play), [пример](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). - +Данные также доступны для работы с интерактивными запросами через [Playground](https://play.clickhouse.com/play?user=play), [пример](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). diff --git a/docs/ru/getting-started/example-datasets/cell-towers.md b/docs/ru/getting-started/example-datasets/cell-towers.md index 254d53ad7e1..49174994c14 100644 --- a/docs/ru/getting-started/example-datasets/cell-towers.md +++ b/docs/ru/getting-started/example-datasets/cell-towers.md @@ -125,4 +125,4 @@ SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM 1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) ``` -Вы можете протестировать другие запросы с помощью интерактивного ресурса [Playground](https://gh-api.clickhouse.com/play?user=play). Например, [вот так](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). Однако, обратите внимание, что здесь нельзя создавать временные таблицы. +Вы можете протестировать другие запросы с помощью интерактивного ресурса [Playground](https://play.clickhouse.com/play?user=play). Например, [вот так](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). Однако, обратите внимание, что здесь нельзя создавать временные таблицы. diff --git a/docs/ru/getting-started/example-datasets/recipes.md b/docs/ru/getting-started/example-datasets/recipes.md index 08838f1c950..f3b4c8285d7 100644 --- a/docs/ru/getting-started/example-datasets/recipes.md +++ b/docs/ru/getting-started/example-datasets/recipes.md @@ -337,6 +337,6 @@ WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake'; ### Online Playground -Этот набор данных доступен в [Online Playground](https://gh-api.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). +Этот набор данных доступен в [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). [Оригинальная статья](https://clickhouse.com/docs/ru/getting-started/example-datasets/recipes/) From f7a5b1fdafc3f83106f159d56e0d2677d7b02d2d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 28 May 2022 03:24:56 +0200 Subject: [PATCH 593/615] Remove margin in test reports and change the font --- docker/test/fuzzer/run-fuzzer.sh | 18 ++---------------- tests/ci/report.py | 17 ++--------------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 32799a669eb..f186cf46adf 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -355,22 +355,9 @@ fi cat > report.html < -